1 // Copyright 2015, ARM Limited 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include <stdio.h> 28 #include <float.h> 29 30 #include "test-runner.h" 31 #include "test-utils-a64.h" 32 #include "test-simulator-inputs-a64.h" 33 #include "test-simulator-traces-a64.h" 34 #include "vixl/a64/macro-assembler-a64.h" 35 #include "vixl/a64/simulator-a64.h" 36 37 namespace vixl { 38 39 // ==== Simulator Tests ==== 40 // 41 // These simulator tests check instruction behaviour against a trace taken from 42 // real AArch64 hardware. The same test code is used to generate the trace; the 43 // results are printed to stdout when the test is run with --sim_test_trace. 44 // 45 // The input lists and expected results are stored in test/traces. The expected 46 // results can be regenerated using tools/generate_simulator_traces.py. Adding 47 // a test for a new instruction is described at the top of 48 // test-simulator-traces-a64.h. 49 50 #define __ masm. 51 #define TEST(name) TEST_(SIM_##name) 52 53 #define BUF_SIZE (256) 54 55 #ifdef VIXL_INCLUDE_SIMULATOR 56 57 #define SETUP() \ 58 MacroAssembler masm(BUF_SIZE); \ 59 Decoder decoder; \ 60 Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder) \ 61 : new Simulator(&decoder); \ 62 simulator->set_coloured_trace(Test::coloured_trace()); \ 63 simulator->set_instruction_stats(Test::instruction_stats()); \ 64 65 #define START() \ 66 masm.Reset(); \ 67 simulator->ResetState(); \ 68 __ PushCalleeSavedRegisters(); \ 69 if (Test::trace_reg()) { \ 70 __ Trace(LOG_STATE, TRACE_ENABLE); \ 71 } \ 72 if (Test::trace_write()) { \ 73 __ Trace(LOG_WRITE, TRACE_ENABLE); \ 74 } \ 75 if (Test::trace_sim()) { \ 76 __ Trace(LOG_DISASM, TRACE_ENABLE); \ 77 } \ 78 if (Test::instruction_stats()) { \ 79 __ EnableInstrumentation(); \ 80 } 81 82 #define END() \ 83 if (Test::instruction_stats()) { \ 84 __ DisableInstrumentation(); \ 85 } \ 86 __ Trace(LOG_ALL, TRACE_DISABLE); \ 87 __ PopCalleeSavedRegisters(); \ 88 __ Ret(); \ 89 masm.FinalizeCode() 90 91 #define RUN() \ 92 simulator->RunFrom(masm.GetStartAddress<Instruction*>()) 93 94 #define TEARDOWN() \ 95 delete simulator; 96 97 #else // VIXL_INCLUDE_SIMULATOR 98 99 #define SETUP() \ 100 MacroAssembler masm(BUF_SIZE); \ 101 CPU::SetUp() 102 103 #define START() \ 104 masm.Reset(); \ 105 __ PushCalleeSavedRegisters() 106 107 #define END() \ 108 __ PopCalleeSavedRegisters(); \ 109 __ Ret(); \ 110 masm.FinalizeCode() 111 112 #define RUN() \ 113 { \ 114 byte* buffer_start = masm.GetStartAddress<byte*>(); \ 115 size_t buffer_length = masm.CursorOffset(); \ 116 void (*test_function)(void); \ 117 \ 118 CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length); \ 119 VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function)); \ 120 memcpy(&test_function, &buffer_start, sizeof(buffer_start)); \ 121 test_function(); \ 122 } 123 124 #define TEARDOWN() 125 126 #endif // VIXL_INCLUDE_SIMULATOR 127 128 129 // The maximum number of errors to report in detail for each test. 130 static const unsigned kErrorReportLimit = 8; 131 132 133 // Overloaded versions of rawbits_to_double and rawbits_to_float for use in the 134 // templated test functions. 135 static float rawbits_to_fp(uint32_t bits) { 136 return rawbits_to_float(bits); 137 } 138 139 static double rawbits_to_fp(uint64_t bits) { 140 return rawbits_to_double(bits); 141 } 142 143 144 // MacroAssembler member function pointers to pass to the test dispatchers. 145 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd, 146 const FPRegister& fn); 147 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd, 148 const FPRegister& fn, 149 const FPRegister& fm); 150 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd, 151 const FPRegister& fn, 152 const FPRegister& fm, 153 const FPRegister& fa); 154 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn, 155 const FPRegister& fm); 156 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn, 157 double value); 158 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd, 159 const FPRegister& fn); 160 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd, 161 const FPRegister& fn, 162 int fbits); 163 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd, 164 const Register& rn, 165 int fbits); 166 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be 167 // consolidated into one routine. 168 typedef void (MacroAssembler::*Test1OpNEONHelper_t)( 169 const VRegister& vd, const VRegister& vn); 170 typedef void (MacroAssembler::*Test2OpNEONHelper_t)( 171 const VRegister& vd, const VRegister& vn, const VRegister& vm); 172 typedef void (MacroAssembler::*TestByElementNEONHelper_t)( 173 const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index); 174 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( 175 const VRegister& vd, int imm1, const VRegister& vn, int imm2); 176 177 // This helps using the same typename for both the function pointer 178 // and the array of immediates passed to helper routines. 179 template <typename T> 180 class Test2OpImmediateNEONHelper_t { 181 public: 182 typedef void (MacroAssembler::*mnemonic)( 183 const VRegister& vd, const VRegister& vn, T imm); 184 }; 185 186 187 // Maximum number of hex characters required to represent values of either 188 // templated type. 189 template <typename Ta, typename Tb> 190 static unsigned MaxHexCharCount() { 191 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); 192 return (count * 8) / 4; 193 } 194 195 196 // Standard test dispatchers. 197 198 199 static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs, 200 unsigned inputs_length, uintptr_t results, 201 unsigned d_size, unsigned n_size) { 202 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize)); 203 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 204 205 SETUP(); 206 START(); 207 208 // Roll up the loop to keep the code size down. 209 Label loop_n; 210 211 Register out = x0; 212 Register inputs_base = x1; 213 Register length = w2; 214 Register index_n = w3; 215 216 const int n_index_shift = 217 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 218 219 FPRegister fd = (d_size == kDRegSize) ? d0 : s0; 220 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 221 222 __ Mov(out, results); 223 __ Mov(inputs_base, inputs); 224 __ Mov(length, inputs_length); 225 226 __ Mov(index_n, 0); 227 __ Bind(&loop_n); 228 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 229 230 { 231 SingleEmissionCheckScope guard(&masm); 232 (masm.*helper)(fd, fn); 233 } 234 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex)); 235 236 __ Add(index_n, index_n, 1); 237 __ Cmp(index_n, inputs_length); 238 __ B(lo, &loop_n); 239 240 END(); 241 RUN(); 242 TEARDOWN(); 243 } 244 245 246 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 247 // rawbits representations of doubles or floats. This ensures that exact bit 248 // comparisons can be performed. 249 template <typename Tn, typename Td> 250 static void Test1Op(const char * name, Test1OpFPHelper_t helper, 251 const Tn inputs[], unsigned inputs_length, 252 const Td expected[], unsigned expected_length) { 253 VIXL_ASSERT(inputs_length > 0); 254 255 const unsigned results_length = inputs_length; 256 Td * results = new Td[results_length]; 257 258 const unsigned d_bits = sizeof(Td) * 8; 259 const unsigned n_bits = sizeof(Tn) * 8; 260 261 Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 262 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 263 264 if (Test::sim_test_trace()) { 265 // Print the results. 266 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 267 for (unsigned d = 0; d < results_length; d++) { 268 printf(" 0x%0*" PRIx64 ",\n", 269 d_bits / 4, static_cast<uint64_t>(results[d])); 270 } 271 printf("};\n"); 272 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 273 } else { 274 // Check the results. 275 VIXL_CHECK(expected_length == results_length); 276 unsigned error_count = 0; 277 unsigned d = 0; 278 for (unsigned n = 0; n < inputs_length; n++, d++) { 279 if (results[d] != expected[d]) { 280 if (++error_count > kErrorReportLimit) continue; 281 282 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 283 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), 284 name, rawbits_to_fp(inputs[n])); 285 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 286 d_bits / 4, static_cast<uint64_t>(expected[d]), 287 rawbits_to_fp(expected[d])); 288 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 289 d_bits / 4, static_cast<uint64_t>(results[d]), 290 rawbits_to_fp(results[d])); 291 printf("\n"); 292 } 293 } 294 VIXL_ASSERT(d == expected_length); 295 if (error_count > kErrorReportLimit) { 296 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 297 } 298 VIXL_CHECK(error_count == 0); 299 } 300 delete[] results; 301 } 302 303 304 static void Test2Op_Helper(Test2OpFPHelper_t helper, 305 uintptr_t inputs, unsigned inputs_length, 306 uintptr_t results, unsigned reg_size) { 307 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 308 309 SETUP(); 310 START(); 311 312 // Roll up the loop to keep the code size down. 313 Label loop_n, loop_m; 314 315 Register out = x0; 316 Register inputs_base = x1; 317 Register length = w2; 318 Register index_n = w3; 319 Register index_m = w4; 320 321 bool double_op = reg_size == kDRegSize; 322 const int index_shift = 323 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 324 325 FPRegister fd = double_op ? d0 : s0; 326 FPRegister fn = double_op ? d1 : s1; 327 FPRegister fm = double_op ? d2 : s2; 328 329 __ Mov(out, results); 330 __ Mov(inputs_base, inputs); 331 __ Mov(length, inputs_length); 332 333 __ Mov(index_n, 0); 334 __ Bind(&loop_n); 335 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 336 337 __ Mov(index_m, 0); 338 __ Bind(&loop_m); 339 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 340 341 { 342 SingleEmissionCheckScope guard(&masm); 343 (masm.*helper)(fd, fn, fm); 344 } 345 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex)); 346 347 __ Add(index_m, index_m, 1); 348 __ Cmp(index_m, inputs_length); 349 __ B(lo, &loop_m); 350 351 __ Add(index_n, index_n, 1); 352 __ Cmp(index_n, inputs_length); 353 __ B(lo, &loop_n); 354 355 END(); 356 RUN(); 357 TEARDOWN(); 358 } 359 360 361 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 362 // rawbits representations of doubles or floats. This ensures that exact bit 363 // comparisons can be performed. 364 template <typename T> 365 static void Test2Op(const char * name, Test2OpFPHelper_t helper, 366 const T inputs[], unsigned inputs_length, 367 const T expected[], unsigned expected_length) { 368 VIXL_ASSERT(inputs_length > 0); 369 370 const unsigned results_length = inputs_length * inputs_length; 371 T * results = new T[results_length]; 372 373 const unsigned bits = sizeof(T) * 8; 374 375 Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 376 reinterpret_cast<uintptr_t>(results), bits); 377 378 if (Test::sim_test_trace()) { 379 // Print the results. 380 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 381 for (unsigned d = 0; d < results_length; d++) { 382 printf(" 0x%0*" PRIx64 ",\n", 383 bits / 4, static_cast<uint64_t>(results[d])); 384 } 385 printf("};\n"); 386 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 387 } else { 388 // Check the results. 389 VIXL_CHECK(expected_length == results_length); 390 unsigned error_count = 0; 391 unsigned d = 0; 392 for (unsigned n = 0; n < inputs_length; n++) { 393 for (unsigned m = 0; m < inputs_length; m++, d++) { 394 if (results[d] != expected[d]) { 395 if (++error_count > kErrorReportLimit) continue; 396 397 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 398 name, 399 bits / 4, static_cast<uint64_t>(inputs[n]), 400 bits / 4, static_cast<uint64_t>(inputs[m]), 401 name, 402 rawbits_to_fp(inputs[n]), 403 rawbits_to_fp(inputs[m])); 404 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 405 bits / 4, static_cast<uint64_t>(expected[d]), 406 rawbits_to_fp(expected[d])); 407 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 408 bits / 4, static_cast<uint64_t>(results[d]), 409 rawbits_to_fp(results[d])); 410 printf("\n"); 411 } 412 } 413 } 414 VIXL_ASSERT(d == expected_length); 415 if (error_count > kErrorReportLimit) { 416 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 417 } 418 VIXL_CHECK(error_count == 0); 419 } 420 delete[] results; 421 } 422 423 424 static void Test3Op_Helper(Test3OpFPHelper_t helper, 425 uintptr_t inputs, unsigned inputs_length, 426 uintptr_t results, unsigned reg_size) { 427 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 428 429 SETUP(); 430 START(); 431 432 // Roll up the loop to keep the code size down. 433 Label loop_n, loop_m, loop_a; 434 435 Register out = x0; 436 Register inputs_base = x1; 437 Register length = w2; 438 Register index_n = w3; 439 Register index_m = w4; 440 Register index_a = w5; 441 442 bool double_op = reg_size == kDRegSize; 443 const int index_shift = 444 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 445 446 FPRegister fd = double_op ? d0 : s0; 447 FPRegister fn = double_op ? d1 : s1; 448 FPRegister fm = double_op ? d2 : s2; 449 FPRegister fa = double_op ? d3 : s3; 450 451 __ Mov(out, results); 452 __ Mov(inputs_base, inputs); 453 __ Mov(length, inputs_length); 454 455 __ Mov(index_n, 0); 456 __ Bind(&loop_n); 457 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 458 459 __ Mov(index_m, 0); 460 __ Bind(&loop_m); 461 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 462 463 __ Mov(index_a, 0); 464 __ Bind(&loop_a); 465 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift)); 466 467 { 468 SingleEmissionCheckScope guard(&masm); 469 (masm.*helper)(fd, fn, fm, fa); 470 } 471 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex)); 472 473 __ Add(index_a, index_a, 1); 474 __ Cmp(index_a, inputs_length); 475 __ B(lo, &loop_a); 476 477 __ Add(index_m, index_m, 1); 478 __ Cmp(index_m, inputs_length); 479 __ B(lo, &loop_m); 480 481 __ Add(index_n, index_n, 1); 482 __ Cmp(index_n, inputs_length); 483 __ B(lo, &loop_n); 484 485 END(); 486 RUN(); 487 TEARDOWN(); 488 } 489 490 491 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 492 // rawbits representations of doubles or floats. This ensures that exact bit 493 // comparisons can be performed. 494 template <typename T> 495 static void Test3Op(const char * name, Test3OpFPHelper_t helper, 496 const T inputs[], unsigned inputs_length, 497 const T expected[], unsigned expected_length) { 498 VIXL_ASSERT(inputs_length > 0); 499 500 const unsigned results_length = inputs_length * inputs_length * inputs_length; 501 T * results = new T[results_length]; 502 503 const unsigned bits = sizeof(T) * 8; 504 505 Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 506 reinterpret_cast<uintptr_t>(results), bits); 507 508 if (Test::sim_test_trace()) { 509 // Print the results. 510 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 511 for (unsigned d = 0; d < results_length; d++) { 512 printf(" 0x%0*" PRIx64 ",\n", 513 bits / 4, static_cast<uint64_t>(results[d])); 514 } 515 printf("};\n"); 516 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 517 } else { 518 // Check the results. 519 VIXL_CHECK(expected_length == results_length); 520 unsigned error_count = 0; 521 unsigned d = 0; 522 for (unsigned n = 0; n < inputs_length; n++) { 523 for (unsigned m = 0; m < inputs_length; m++) { 524 for (unsigned a = 0; a < inputs_length; a++, d++) { 525 if (results[d] != expected[d]) { 526 if (++error_count > kErrorReportLimit) continue; 527 528 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64 529 " (%s %g %g %g):\n", 530 name, 531 bits / 4, static_cast<uint64_t>(inputs[n]), 532 bits / 4, static_cast<uint64_t>(inputs[m]), 533 bits / 4, static_cast<uint64_t>(inputs[a]), 534 name, 535 rawbits_to_fp(inputs[n]), 536 rawbits_to_fp(inputs[m]), 537 rawbits_to_fp(inputs[a])); 538 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 539 bits / 4, static_cast<uint64_t>(expected[d]), 540 rawbits_to_fp(expected[d])); 541 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 542 bits / 4, static_cast<uint64_t>(results[d]), 543 rawbits_to_fp(results[d])); 544 printf("\n"); 545 } 546 } 547 } 548 } 549 VIXL_ASSERT(d == expected_length); 550 if (error_count > kErrorReportLimit) { 551 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 552 } 553 VIXL_CHECK(error_count == 0); 554 } 555 delete[] results; 556 } 557 558 559 static void TestCmp_Helper(TestFPCmpHelper_t helper, 560 uintptr_t inputs, unsigned inputs_length, 561 uintptr_t results, unsigned reg_size) { 562 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 563 564 SETUP(); 565 START(); 566 567 // Roll up the loop to keep the code size down. 568 Label loop_n, loop_m; 569 570 Register out = x0; 571 Register inputs_base = x1; 572 Register length = w2; 573 Register index_n = w3; 574 Register index_m = w4; 575 Register flags = x5; 576 577 bool double_op = reg_size == kDRegSize; 578 const int index_shift = 579 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 580 581 FPRegister fn = double_op ? d1 : s1; 582 FPRegister fm = double_op ? d2 : s2; 583 584 __ Mov(out, results); 585 __ Mov(inputs_base, inputs); 586 __ Mov(length, inputs_length); 587 588 __ Mov(index_n, 0); 589 __ Bind(&loop_n); 590 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 591 592 __ Mov(index_m, 0); 593 __ Bind(&loop_m); 594 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 595 596 { 597 SingleEmissionCheckScope guard(&masm); 598 (masm.*helper)(fn, fm); 599 } 600 __ Mrs(flags, NZCV); 601 __ Ubfx(flags, flags, 28, 4); 602 __ Strb(flags, MemOperand(out, 1, PostIndex)); 603 604 __ Add(index_m, index_m, 1); 605 __ Cmp(index_m, inputs_length); 606 __ B(lo, &loop_m); 607 608 __ Add(index_n, index_n, 1); 609 __ Cmp(index_n, inputs_length); 610 __ B(lo, &loop_n); 611 612 END(); 613 RUN(); 614 TEARDOWN(); 615 } 616 617 618 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 619 // rawbits representations of doubles or floats. This ensures that exact bit 620 // comparisons can be performed. 621 template <typename T> 622 static void TestCmp(const char * name, TestFPCmpHelper_t helper, 623 const T inputs[], unsigned inputs_length, 624 const uint8_t expected[], unsigned expected_length) { 625 VIXL_ASSERT(inputs_length > 0); 626 627 const unsigned results_length = inputs_length * inputs_length; 628 uint8_t * results = new uint8_t[results_length]; 629 630 const unsigned bits = sizeof(T) * 8; 631 632 TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 633 reinterpret_cast<uintptr_t>(results), bits); 634 635 if (Test::sim_test_trace()) { 636 // Print the results. 637 printf("const uint8_t kExpected_%s[] = {\n", name); 638 for (unsigned d = 0; d < results_length; d++) { 639 // Each NZCV result only requires 4 bits. 640 VIXL_ASSERT((results[d] & 0xf) == results[d]); 641 printf(" 0x%" PRIx8 ",\n", results[d]); 642 } 643 printf("};\n"); 644 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 645 } else { 646 // Check the results. 647 VIXL_CHECK(expected_length == results_length); 648 unsigned error_count = 0; 649 unsigned d = 0; 650 for (unsigned n = 0; n < inputs_length; n++) { 651 for (unsigned m = 0; m < inputs_length; m++, d++) { 652 if (results[d] != expected[d]) { 653 if (++error_count > kErrorReportLimit) continue; 654 655 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 656 name, 657 bits / 4, static_cast<uint64_t>(inputs[n]), 658 bits / 4, static_cast<uint64_t>(inputs[m]), 659 name, 660 rawbits_to_fp(inputs[n]), 661 rawbits_to_fp(inputs[m])); 662 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 663 (expected[d] & 0x8) ? 'N' : 'n', 664 (expected[d] & 0x4) ? 'Z' : 'z', 665 (expected[d] & 0x2) ? 'C' : 'c', 666 (expected[d] & 0x1) ? 'V' : 'v', 667 expected[d]); 668 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 669 (results[d] & 0x8) ? 'N' : 'n', 670 (results[d] & 0x4) ? 'Z' : 'z', 671 (results[d] & 0x2) ? 'C' : 'c', 672 (results[d] & 0x1) ? 'V' : 'v', 673 results[d]); 674 printf("\n"); 675 } 676 } 677 } 678 VIXL_ASSERT(d == expected_length); 679 if (error_count > kErrorReportLimit) { 680 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 681 } 682 VIXL_CHECK(error_count == 0); 683 } 684 delete[] results; 685 } 686 687 688 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper, 689 uintptr_t inputs, unsigned inputs_length, 690 uintptr_t results, unsigned reg_size) { 691 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 692 693 SETUP(); 694 START(); 695 696 // Roll up the loop to keep the code size down. 697 Label loop_n, loop_m; 698 699 Register out = x0; 700 Register inputs_base = x1; 701 Register length = w2; 702 Register index_n = w3; 703 Register flags = x4; 704 705 bool double_op = reg_size == kDRegSize; 706 const int index_shift = 707 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 708 709 FPRegister fn = double_op ? d1 : s1; 710 711 __ Mov(out, results); 712 __ Mov(inputs_base, inputs); 713 __ Mov(length, inputs_length); 714 715 __ Mov(index_n, 0); 716 __ Bind(&loop_n); 717 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 718 719 { 720 SingleEmissionCheckScope guard(&masm); 721 (masm.*helper)(fn, 0.0); 722 } 723 __ Mrs(flags, NZCV); 724 __ Ubfx(flags, flags, 28, 4); 725 __ Strb(flags, MemOperand(out, 1, PostIndex)); 726 727 __ Add(index_n, index_n, 1); 728 __ Cmp(index_n, inputs_length); 729 __ B(lo, &loop_n); 730 731 END(); 732 RUN(); 733 TEARDOWN(); 734 } 735 736 737 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 738 // rawbits representations of doubles or floats. This ensures that exact bit 739 // comparisons can be performed. 740 template <typename T> 741 static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper, 742 const T inputs[], unsigned inputs_length, 743 const uint8_t expected[], unsigned expected_length) { 744 VIXL_ASSERT(inputs_length > 0); 745 746 const unsigned results_length = inputs_length; 747 uint8_t * results = new uint8_t[results_length]; 748 749 const unsigned bits = sizeof(T) * 8; 750 751 TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 752 reinterpret_cast<uintptr_t>(results), bits); 753 754 if (Test::sim_test_trace()) { 755 // Print the results. 756 printf("const uint8_t kExpected_%s[] = {\n", name); 757 for (unsigned d = 0; d < results_length; d++) { 758 // Each NZCV result only requires 4 bits. 759 VIXL_ASSERT((results[d] & 0xf) == results[d]); 760 printf(" 0x%" PRIx8 ",\n", results[d]); 761 } 762 printf("};\n"); 763 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 764 } else { 765 // Check the results. 766 VIXL_CHECK(expected_length == results_length); 767 unsigned error_count = 0; 768 unsigned d = 0; 769 for (unsigned n = 0; n < inputs_length; n++, d++) { 770 if (results[d] != expected[d]) { 771 if (++error_count > kErrorReportLimit) continue; 772 773 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n", 774 name, 775 bits / 4, static_cast<uint64_t>(inputs[n]), 776 bits / 4, 0, 777 name, 778 rawbits_to_fp(inputs[n])); 779 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 780 (expected[d] & 0x8) ? 'N' : 'n', 781 (expected[d] & 0x4) ? 'Z' : 'z', 782 (expected[d] & 0x2) ? 'C' : 'c', 783 (expected[d] & 0x1) ? 'V' : 'v', 784 expected[d]); 785 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 786 (results[d] & 0x8) ? 'N' : 'n', 787 (results[d] & 0x4) ? 'Z' : 'z', 788 (results[d] & 0x2) ? 'C' : 'c', 789 (results[d] & 0x1) ? 'V' : 'v', 790 results[d]); 791 printf("\n"); 792 } 793 } 794 VIXL_ASSERT(d == expected_length); 795 if (error_count > kErrorReportLimit) { 796 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 797 } 798 VIXL_CHECK(error_count == 0); 799 } 800 delete[] results; 801 } 802 803 804 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper, 805 uintptr_t inputs, unsigned inputs_length, 806 uintptr_t results, 807 unsigned d_size, unsigned n_size) { 808 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 809 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 810 811 SETUP(); 812 START(); 813 814 // Roll up the loop to keep the code size down. 815 Label loop_n; 816 817 Register out = x0; 818 Register inputs_base = x1; 819 Register length = w2; 820 Register index_n = w3; 821 822 const int n_index_shift = 823 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 824 825 Register rd = (d_size == kXRegSize) ? x10 : w10; 826 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 827 828 __ Mov(out, results); 829 __ Mov(inputs_base, inputs); 830 __ Mov(length, inputs_length); 831 832 __ Mov(index_n, 0); 833 __ Bind(&loop_n); 834 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 835 836 for (unsigned fbits = 0; fbits <= d_size; ++fbits) { 837 { 838 SingleEmissionCheckScope guard(&masm); 839 (masm.*helper)(rd, fn, fbits); 840 } 841 __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex)); 842 } 843 844 __ Add(index_n, index_n, 1); 845 __ Cmp(index_n, inputs_length); 846 __ B(lo, &loop_n); 847 848 END(); 849 RUN(); 850 TEARDOWN(); 851 } 852 853 854 static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs, 855 unsigned inputs_length, uintptr_t results, 856 unsigned d_size, unsigned n_size) { 857 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 858 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 859 860 SETUP(); 861 START(); 862 863 // Roll up the loop to keep the code size down. 864 Label loop_n; 865 866 Register out = x0; 867 Register inputs_base = x1; 868 Register length = w2; 869 Register index_n = w3; 870 871 const int n_index_shift = 872 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 873 874 Register rd = (d_size == kXRegSize) ? x10 : w10; 875 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 876 877 __ Mov(out, results); 878 __ Mov(inputs_base, inputs); 879 __ Mov(length, inputs_length); 880 881 __ Mov(index_n, 0); 882 __ Bind(&loop_n); 883 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 884 885 { 886 SingleEmissionCheckScope guard(&masm); 887 (masm.*helper)(rd, fn); 888 } 889 __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex)); 890 891 __ Add(index_n, index_n, 1); 892 __ Cmp(index_n, inputs_length); 893 __ B(lo, &loop_n); 894 895 END(); 896 RUN(); 897 TEARDOWN(); 898 } 899 900 901 // Test FP instructions. 902 // - The inputs[] array should be an array of rawbits representations of 903 // doubles or floats. This ensures that exact bit comparisons can be 904 // performed. 905 // - The expected[] array should be an array of signed integers. 906 template <typename Tn, typename Td> 907 static void TestFPToS(const char * name, TestFPToIntHelper_t helper, 908 const Tn inputs[], unsigned inputs_length, 909 const Td expected[], unsigned expected_length) { 910 VIXL_ASSERT(inputs_length > 0); 911 912 const unsigned results_length = inputs_length; 913 Td * results = new Td[results_length]; 914 915 const unsigned d_bits = sizeof(Td) * 8; 916 const unsigned n_bits = sizeof(Tn) * 8; 917 918 TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 919 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 920 921 if (Test::sim_test_trace()) { 922 // Print the results. 923 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 924 // There is no simple C++ literal for INT*_MIN that doesn't produce 925 // warnings, so we use an appropriate constant in that case instead. 926 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 927 // the like) avoids warnings about comparing values with differing ranges. 928 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 929 const int64_t int_d_min = -(int_d_max) - 1; 930 for (unsigned d = 0; d < results_length; d++) { 931 if (results[d] == int_d_min) { 932 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 933 } else { 934 // Some constants (such as those between INT32_MAX and UINT32_MAX) 935 // trigger compiler warnings. To avoid these warnings, use an 936 // appropriate macro to make the type explicit. 937 int64_t result_int64 = static_cast<int64_t>(results[d]); 938 if (result_int64 >= 0) { 939 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 940 } else { 941 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 942 } 943 } 944 } 945 printf("};\n"); 946 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 947 } else { 948 // Check the results. 949 VIXL_CHECK(expected_length == results_length); 950 unsigned error_count = 0; 951 unsigned d = 0; 952 for (unsigned n = 0; n < inputs_length; n++, d++) { 953 if (results[d] != expected[d]) { 954 if (++error_count > kErrorReportLimit) continue; 955 956 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 957 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), 958 name, rawbits_to_fp(inputs[n])); 959 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 960 d_bits / 4, static_cast<uint64_t>(expected[d]), 961 static_cast<int64_t>(expected[d])); 962 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 963 d_bits / 4, static_cast<uint64_t>(results[d]), 964 static_cast<int64_t>(results[d])); 965 printf("\n"); 966 } 967 } 968 VIXL_ASSERT(d == expected_length); 969 if (error_count > kErrorReportLimit) { 970 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 971 } 972 VIXL_CHECK(error_count == 0); 973 } 974 delete[] results; 975 } 976 977 978 // Test FP instructions. 979 // - The inputs[] array should be an array of rawbits representations of 980 // doubles or floats. This ensures that exact bit comparisons can be 981 // performed. 982 // - The expected[] array should be an array of unsigned integers. 983 template <typename Tn, typename Td> 984 static void TestFPToU(const char * name, TestFPToIntHelper_t helper, 985 const Tn inputs[], unsigned inputs_length, 986 const Td expected[], unsigned expected_length) { 987 VIXL_ASSERT(inputs_length > 0); 988 989 const unsigned results_length = inputs_length; 990 Td * results = new Td[results_length]; 991 992 const unsigned d_bits = sizeof(Td) * 8; 993 const unsigned n_bits = sizeof(Tn) * 8; 994 995 TestFPToInt_Helper(helper, 996 reinterpret_cast<uintptr_t>(inputs), inputs_length, 997 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 998 999 if (Test::sim_test_trace()) { 1000 // Print the results. 1001 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1002 for (unsigned d = 0; d < results_length; d++) { 1003 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1004 } 1005 printf("};\n"); 1006 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1007 } else { 1008 // Check the results. 1009 VIXL_CHECK(expected_length == results_length); 1010 unsigned error_count = 0; 1011 unsigned d = 0; 1012 for (unsigned n = 0; n < inputs_length; n++, d++) { 1013 if (results[d] != expected[d]) { 1014 if (++error_count > kErrorReportLimit) continue; 1015 1016 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1017 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), 1018 name, rawbits_to_fp(inputs[n])); 1019 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1020 d_bits / 4, static_cast<uint64_t>(expected[d]), 1021 static_cast<uint64_t>(expected[d])); 1022 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1023 d_bits / 4, static_cast<uint64_t>(results[d]), 1024 static_cast<uint64_t>(results[d])); 1025 printf("\n"); 1026 } 1027 } 1028 VIXL_ASSERT(d == expected_length); 1029 if (error_count > kErrorReportLimit) { 1030 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1031 } 1032 VIXL_CHECK(error_count == 0); 1033 } 1034 delete[] results; 1035 } 1036 1037 1038 // Test FP instructions. 1039 // - The inputs[] array should be an array of rawbits representations of 1040 // doubles or floats. This ensures that exact bit comparisons can be 1041 // performed. 1042 // - The expected[] array should be an array of signed integers. 1043 template <typename Tn, typename Td> 1044 static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper, 1045 const Tn inputs[], unsigned inputs_length, 1046 const Td expected[], unsigned expected_length) { 1047 VIXL_ASSERT(inputs_length > 0); 1048 1049 const unsigned d_bits = sizeof(Td) * 8; 1050 const unsigned n_bits = sizeof(Tn) * 8; 1051 1052 const unsigned results_length = inputs_length * (d_bits + 1); 1053 Td * results = new Td[results_length]; 1054 1055 TestFPToFixed_Helper(helper, 1056 reinterpret_cast<uintptr_t>(inputs), inputs_length, 1057 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 1058 1059 if (Test::sim_test_trace()) { 1060 // Print the results. 1061 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 1062 // There is no simple C++ literal for INT*_MIN that doesn't produce 1063 // warnings, so we use an appropriate constant in that case instead. 1064 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1065 // the like) avoids warnings about comparing values with differing ranges. 1066 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1067 const int64_t int_d_min = -(int_d_max) - 1; 1068 for (unsigned d = 0; d < results_length; d++) { 1069 if (results[d] == int_d_min) { 1070 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1071 } else { 1072 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1073 // trigger compiler warnings. To avoid these warnings, use an 1074 // appropriate macro to make the type explicit. 1075 int64_t result_int64 = static_cast<int64_t>(results[d]); 1076 if (result_int64 >= 0) { 1077 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1078 } else { 1079 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1080 } 1081 } 1082 } 1083 printf("};\n"); 1084 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1085 } else { 1086 // Check the results. 1087 VIXL_CHECK(expected_length == results_length); 1088 unsigned error_count = 0; 1089 unsigned d = 0; 1090 for (unsigned n = 0; n < inputs_length; n++) { 1091 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1092 if (results[d] != expected[d]) { 1093 if (++error_count > kErrorReportLimit) continue; 1094 1095 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1096 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits, 1097 name, rawbits_to_fp(inputs[n]), fbits); 1098 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1099 d_bits / 4, static_cast<uint64_t>(expected[d]), 1100 static_cast<int64_t>(expected[d])); 1101 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1102 d_bits / 4, static_cast<uint64_t>(results[d]), 1103 static_cast<int64_t>(results[d])); 1104 printf("\n"); 1105 } 1106 } 1107 } 1108 VIXL_ASSERT(d == expected_length); 1109 if (error_count > kErrorReportLimit) { 1110 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1111 } 1112 VIXL_CHECK(error_count == 0); 1113 } 1114 delete[] results; 1115 } 1116 1117 1118 // Test FP instructions. 1119 // - The inputs[] array should be an array of rawbits representations of 1120 // doubles or floats. This ensures that exact bit comparisons can be 1121 // performed. 1122 // - The expected[] array should be an array of unsigned integers. 1123 template <typename Tn, typename Td> 1124 static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper, 1125 const Tn inputs[], unsigned inputs_length, 1126 const Td expected[], unsigned expected_length) { 1127 VIXL_ASSERT(inputs_length > 0); 1128 1129 const unsigned d_bits = sizeof(Td) * 8; 1130 const unsigned n_bits = sizeof(Tn) * 8; 1131 1132 const unsigned results_length = inputs_length * (d_bits + 1); 1133 Td * results = new Td[results_length]; 1134 1135 TestFPToFixed_Helper(helper, 1136 reinterpret_cast<uintptr_t>(inputs), inputs_length, 1137 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 1138 1139 if (Test::sim_test_trace()) { 1140 // Print the results. 1141 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1142 for (unsigned d = 0; d < results_length; d++) { 1143 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1144 } 1145 printf("};\n"); 1146 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1147 } else { 1148 // Check the results. 1149 VIXL_CHECK(expected_length == results_length); 1150 unsigned error_count = 0; 1151 unsigned d = 0; 1152 for (unsigned n = 0; n < inputs_length; n++) { 1153 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1154 if (results[d] != expected[d]) { 1155 if (++error_count > kErrorReportLimit) continue; 1156 1157 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1158 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits, 1159 name, rawbits_to_fp(inputs[n]), fbits); 1160 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1161 d_bits / 4, static_cast<uint64_t>(expected[d]), 1162 static_cast<uint64_t>(expected[d])); 1163 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1164 d_bits / 4, static_cast<uint64_t>(results[d]), 1165 static_cast<uint64_t>(results[d])); 1166 printf("\n"); 1167 } 1168 } 1169 } 1170 VIXL_ASSERT(d == expected_length); 1171 if (error_count > kErrorReportLimit) { 1172 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1173 } 1174 VIXL_CHECK(error_count == 0); 1175 } 1176 delete[] results; 1177 } 1178 1179 1180 // ==== Tests for instructions of the form <INST> VReg, VReg. ==== 1181 1182 1183 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, 1184 uintptr_t inputs_n, unsigned inputs_n_length, 1185 uintptr_t results, 1186 VectorFormat vd_form, 1187 VectorFormat vn_form) { 1188 VIXL_ASSERT(vd_form != kFormatUndefined); 1189 VIXL_ASSERT(vn_form != kFormatUndefined); 1190 1191 SETUP(); 1192 START(); 1193 1194 // Roll up the loop to keep the code size down. 1195 Label loop_n; 1196 1197 Register out = x0; 1198 Register inputs_n_base = x1; 1199 Register inputs_n_last_16bytes = x3; 1200 Register index_n = x5; 1201 1202 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1203 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1204 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1205 1206 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1207 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1208 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1209 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1210 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1211 1212 1213 // These will be either a D- or a Q-register form, with a single lane 1214 // (for use in scalar load and store operations). 1215 VRegister vd = VRegister(0, vd_bits); 1216 VRegister vn = v1.V16B(); 1217 VRegister vntmp = v3.V16B(); 1218 1219 // These will have the correct format for use when calling 'helper'. 1220 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 1221 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1222 1223 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1224 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1225 1226 __ Mov(out, results); 1227 1228 __ Mov(inputs_n_base, inputs_n); 1229 __ Mov(inputs_n_last_16bytes, 1230 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 1231 1232 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1233 1234 __ Mov(index_n, 0); 1235 __ Bind(&loop_n); 1236 1237 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1238 vn_lane_bytes_log2)); 1239 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1240 1241 // Set the destination to zero. 1242 // TODO: Setting the destination to values other than zero 1243 // might be a better test for instructions such as sqxtn2 1244 // which may leave parts of V registers unchanged. 1245 __ Movi(vd.V16B(), 0); 1246 1247 { 1248 SingleEmissionCheckScope guard(&masm); 1249 (masm.*helper)(vd_helper, vn_helper); 1250 } 1251 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1252 1253 __ Add(index_n, index_n, 1); 1254 __ Cmp(index_n, inputs_n_length); 1255 __ B(lo, &loop_n); 1256 1257 END(); 1258 RUN(); 1259 TEARDOWN(); 1260 } 1261 1262 1263 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1264 // arrays of rawbit representation of input values. This ensures that 1265 // exact bit comparisons can be performed. 1266 template <typename Td, typename Tn> 1267 static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper, 1268 const Tn inputs_n[], unsigned inputs_n_length, 1269 const Td expected[], unsigned expected_length, 1270 VectorFormat vd_form, 1271 VectorFormat vn_form) { 1272 VIXL_ASSERT(inputs_n_length > 0); 1273 1274 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1275 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1276 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1277 1278 const unsigned results_length = inputs_n_length; 1279 Td* results = new Td[results_length * vd_lane_count]; 1280 const unsigned lane_bit = sizeof(Td) * 8; 1281 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1282 1283 Test1OpNEON_Helper(helper, 1284 reinterpret_cast<uintptr_t>(inputs_n), 1285 inputs_n_length, 1286 reinterpret_cast<uintptr_t>(results), 1287 vd_form, vn_form); 1288 1289 if (Test::sim_test_trace()) { 1290 // Print the results. 1291 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1292 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1293 printf(" "); 1294 // Output a separate result for each element of the result vector. 1295 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1296 unsigned index = lane + (iteration * vd_lane_count); 1297 printf(" 0x%0*" PRIx64 ",", 1298 lane_len_in_hex, 1299 static_cast<uint64_t>(results[index])); 1300 } 1301 printf("\n"); 1302 } 1303 1304 printf("};\n"); 1305 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1306 name, 1307 results_length); 1308 } else { 1309 // Check the results. 1310 VIXL_CHECK(expected_length == results_length); 1311 unsigned error_count = 0; 1312 unsigned d = 0; 1313 const char* padding = " "; 1314 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1315 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1316 bool error_in_vector = false; 1317 1318 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1319 unsigned output_index = (n * vd_lane_count) + lane; 1320 1321 if (results[output_index] != expected[output_index]) { 1322 error_in_vector = true; 1323 break; 1324 } 1325 } 1326 1327 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1328 printf("%s\n", name); 1329 printf(" Vn%.*s| Vd%.*s| Expected\n", 1330 lane_len_in_hex+1, padding, 1331 lane_len_in_hex+1, padding); 1332 1333 const unsigned first_index_n = 1334 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 1335 1336 for (unsigned lane = 0; 1337 lane < std::max(vd_lane_count, vn_lane_count); 1338 lane++) { 1339 unsigned output_index = (n * vd_lane_count) + lane; 1340 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 1341 1342 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 1343 "| 0x%0*" PRIx64 "\n", 1344 results[output_index] != expected[output_index] ? '*' : ' ', 1345 lane_len_in_hex, 1346 static_cast<uint64_t>(inputs_n[input_index_n]), 1347 lane_len_in_hex, 1348 static_cast<uint64_t>(results[output_index]), 1349 lane_len_in_hex, 1350 static_cast<uint64_t>(expected[output_index])); 1351 } 1352 } 1353 } 1354 VIXL_ASSERT(d == expected_length); 1355 if (error_count > kErrorReportLimit) { 1356 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1357 } 1358 VIXL_CHECK(error_count == 0); 1359 } 1360 delete[] results; 1361 } 1362 1363 1364 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== 1365 // where <V> is one of B, H, S or D registers. 1366 // e.g. saddlv H1, v0.8B 1367 1368 // TODO: Change tests to store all lanes of the resulting V register. 1369 // Some tests store all 128 bits of the resulting V register to 1370 // check the simulator's behaviour on the rest of the register. 1371 // This is better than storing the affected lanes only. 1372 // Change any tests such as the 'Across' template to do the same. 1373 1374 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, 1375 uintptr_t inputs_n, 1376 unsigned inputs_n_length, 1377 uintptr_t results, 1378 VectorFormat vd_form, 1379 VectorFormat vn_form) { 1380 VIXL_ASSERT(vd_form != kFormatUndefined); 1381 VIXL_ASSERT(vn_form != kFormatUndefined); 1382 1383 SETUP(); 1384 START(); 1385 1386 // Roll up the loop to keep the code size down. 1387 Label loop_n; 1388 1389 Register out = x0; 1390 Register inputs_n_base = x1; 1391 Register inputs_n_last_vector = x3; 1392 Register index_n = x5; 1393 1394 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1395 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1396 1397 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1398 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1399 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1400 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1401 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1402 1403 1404 // These will be either a D- or a Q-register form, with a single lane 1405 // (for use in scalar load and store operations). 1406 VRegister vd = VRegister(0, vd_bits); 1407 VRegister vn = VRegister(1, vn_bits); 1408 VRegister vntmp = VRegister(3, vn_bits); 1409 1410 // These will have the correct format for use when calling 'helper'. 1411 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1412 1413 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1414 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1415 1416 // Same registers for use in the 'ext' instructions. 1417 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 1418 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 1419 1420 __ Mov(out, results); 1421 1422 __ Mov(inputs_n_base, inputs_n); 1423 __ Mov(inputs_n_last_vector, 1424 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 1425 1426 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 1427 1428 __ Mov(index_n, 0); 1429 __ Bind(&loop_n); 1430 1431 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1432 vn_lane_bytes_log2)); 1433 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 1434 1435 // Set the destination to zero for tests such as '[r]shrn2'. 1436 // TODO: Setting the destination to values other than zero 1437 // might be a better test for instructions such as sqxtn2 1438 // which may leave parts of V registers unchanged. 1439 __ Movi(vd.V16B(), 0); 1440 1441 { 1442 SingleEmissionCheckScope guard(&masm); 1443 (masm.*helper)(vd, vn_helper); 1444 } 1445 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1446 1447 __ Add(index_n, index_n, 1); 1448 __ Cmp(index_n, inputs_n_length); 1449 __ B(lo, &loop_n); 1450 1451 END(); 1452 RUN(); 1453 TEARDOWN(); 1454 } 1455 1456 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1457 // arrays of rawbit representation of input values. This ensures that 1458 // exact bit comparisons can be performed. 1459 template <typename Td, typename Tn> 1460 static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper, 1461 const Tn inputs_n[], unsigned inputs_n_length, 1462 const Td expected[], unsigned expected_length, 1463 VectorFormat vd_form, 1464 VectorFormat vn_form) { 1465 VIXL_ASSERT(inputs_n_length > 0); 1466 1467 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1468 1469 const unsigned results_length = inputs_n_length; 1470 Td* results = new Td[results_length * vd_lane_count]; 1471 const unsigned lane_bit = sizeof(Td) * 8; 1472 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1473 1474 Test1OpAcrossNEON_Helper(helper, 1475 reinterpret_cast<uintptr_t>(inputs_n), 1476 inputs_n_length, 1477 reinterpret_cast<uintptr_t>(results), 1478 vd_form, vn_form); 1479 1480 if (Test::sim_test_trace()) { 1481 // Print the results. 1482 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1483 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1484 printf(" "); 1485 // Output a separate result for each element of the result vector. 1486 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1487 unsigned index = lane + (iteration * vd_lane_count); 1488 printf(" 0x%0*" PRIx64 ",", 1489 lane_len_in_hex, 1490 static_cast<uint64_t>(results[index])); 1491 } 1492 printf("\n"); 1493 } 1494 1495 printf("};\n"); 1496 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1497 name, 1498 results_length); 1499 } else { 1500 // Check the results. 1501 VIXL_CHECK(expected_length == results_length); 1502 unsigned error_count = 0; 1503 unsigned d = 0; 1504 const char* padding = " "; 1505 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1506 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1507 bool error_in_vector = false; 1508 1509 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1510 unsigned output_index = (n * vd_lane_count) + lane; 1511 1512 if (results[output_index] != expected[output_index]) { 1513 error_in_vector = true; 1514 break; 1515 } 1516 } 1517 1518 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1519 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1520 1521 printf("%s\n", name); 1522 printf(" Vn%.*s| Vd%.*s| Expected\n", 1523 lane_len_in_hex+1, padding, 1524 lane_len_in_hex+1, padding); 1525 1526 // TODO: In case of an error, all tests print out as many elements as 1527 // there are lanes in the output or input vectors. This way 1528 // the viewer can read all the values that were needed for the 1529 // operation but the output contains also unnecessary values. 1530 // These prints can be improved according to the arguments 1531 // passed to test functions. 1532 // This output for the 'Across' category has the required 1533 // modifications. 1534 for (unsigned lane = 0; lane < vn_lane_count; lane++) { 1535 unsigned output_index = n * vd_lane_count; 1536 unsigned input_index_n = (inputs_n_length - vn_lane_count + 1537 n + 1 + lane) % inputs_n_length; 1538 1539 if (vn_lane_count-1 == lane) { // Is this the last lane? 1540 // Print the result element(s) in the last lane only. 1541 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 1542 "| 0x%0*" PRIx64 "\n", 1543 results[output_index] != expected[output_index] ? '*' : ' ', 1544 lane_len_in_hex, 1545 static_cast<uint64_t>(inputs_n[input_index_n]), 1546 lane_len_in_hex, 1547 static_cast<uint64_t>(results[output_index]), 1548 lane_len_in_hex, 1549 static_cast<uint64_t>(expected[output_index])); 1550 } else { 1551 printf(" 0x%0*" PRIx64 " | %.*s| %.*s\n", 1552 lane_len_in_hex, 1553 static_cast<uint64_t>(inputs_n[input_index_n]), 1554 lane_len_in_hex+1, padding, 1555 lane_len_in_hex+1, padding); 1556 } 1557 } 1558 } 1559 } 1560 VIXL_ASSERT(d == expected_length); 1561 if (error_count > kErrorReportLimit) { 1562 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1563 } 1564 VIXL_CHECK(error_count == 0); 1565 } 1566 delete[] results; 1567 } 1568 1569 1570 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== 1571 1572 // TODO: Iterate over inputs_d once the traces file is split. 1573 1574 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, 1575 uintptr_t inputs_d, 1576 uintptr_t inputs_n, unsigned inputs_n_length, 1577 uintptr_t inputs_m, unsigned inputs_m_length, 1578 uintptr_t results, 1579 VectorFormat vd_form, 1580 VectorFormat vn_form, 1581 VectorFormat vm_form) { 1582 VIXL_ASSERT(vd_form != kFormatUndefined); 1583 VIXL_ASSERT(vn_form != kFormatUndefined); 1584 VIXL_ASSERT(vm_form != kFormatUndefined); 1585 1586 SETUP(); 1587 START(); 1588 1589 // Roll up the loop to keep the code size down. 1590 Label loop_n, loop_m; 1591 1592 Register out = x0; 1593 Register inputs_n_base = x1; 1594 Register inputs_m_base = x2; 1595 Register inputs_d_base = x3; 1596 Register inputs_n_last_16bytes = x4; 1597 Register inputs_m_last_16bytes = x5; 1598 Register index_n = x6; 1599 Register index_m = x7; 1600 1601 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1602 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1603 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1604 1605 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1606 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1607 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1608 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1609 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1610 1611 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1612 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1613 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1614 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 1615 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 1616 1617 1618 // Always load and store 128 bits regardless of the format. 1619 VRegister vd = v0.V16B(); 1620 VRegister vn = v1.V16B(); 1621 VRegister vm = v2.V16B(); 1622 VRegister vntmp = v3.V16B(); 1623 VRegister vmtmp = v4.V16B(); 1624 VRegister vres = v5.V16B(); 1625 1626 // These will have the correct format for calling the 'helper'. 1627 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1628 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 1629 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 1630 1631 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1632 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1633 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 1634 1635 __ Mov(out, results); 1636 1637 __ Mov(inputs_d_base, inputs_d); 1638 1639 __ Mov(inputs_n_base, inputs_n); 1640 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 1641 __ Mov(inputs_m_base, inputs_m); 1642 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 1643 1644 __ Ldr(vd, MemOperand(inputs_d_base)); 1645 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1646 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 1647 1648 __ Mov(index_n, 0); 1649 __ Bind(&loop_n); 1650 1651 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1652 vn_lane_bytes_log2)); 1653 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1654 1655 __ Mov(index_m, 0); 1656 __ Bind(&loop_m); 1657 1658 __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL, 1659 vm_lane_bytes_log2)); 1660 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 1661 1662 __ Mov(vres, vd); 1663 { 1664 SingleEmissionCheckScope guard(&masm); 1665 (masm.*helper)(vres_helper, vn_helper, vm_helper); 1666 } 1667 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1668 1669 __ Add(index_m, index_m, 1); 1670 __ Cmp(index_m, inputs_m_length); 1671 __ B(lo, &loop_m); 1672 1673 __ Add(index_n, index_n, 1); 1674 __ Cmp(index_n, inputs_n_length); 1675 __ B(lo, &loop_n); 1676 1677 END(); 1678 RUN(); 1679 TEARDOWN(); 1680 } 1681 1682 1683 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1684 // arrays of rawbit representation of input values. This ensures that 1685 // exact bit comparisons can be performed. 1686 template <typename Td, typename Tn, typename Tm> 1687 static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper, 1688 const Td inputs_d[], 1689 const Tn inputs_n[], unsigned inputs_n_length, 1690 const Tm inputs_m[], unsigned inputs_m_length, 1691 const Td expected[], unsigned expected_length, 1692 VectorFormat vd_form, 1693 VectorFormat vn_form, 1694 VectorFormat vm_form) { 1695 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 1696 1697 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 1698 1699 const unsigned results_length = inputs_n_length * inputs_m_length; 1700 Td* results = new Td[results_length * vd_lane_count]; 1701 const unsigned lane_bit = sizeof(Td) * 8; 1702 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 1703 1704 Test2OpNEON_Helper(helper, 1705 reinterpret_cast<uintptr_t>(inputs_d), 1706 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, 1707 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, 1708 reinterpret_cast<uintptr_t>(results), 1709 vd_form, vn_form, vm_form); 1710 1711 if (Test::sim_test_trace()) { 1712 // Print the results. 1713 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1714 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1715 printf(" "); 1716 // Output a separate result for each element of the result vector. 1717 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1718 unsigned index = lane + (iteration * vd_lane_count); 1719 printf(" 0x%0*" PRIx64 ",", 1720 lane_len_in_hex, 1721 static_cast<uint64_t>(results[index])); 1722 } 1723 printf("\n"); 1724 } 1725 1726 printf("};\n"); 1727 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1728 name, 1729 results_length); 1730 } else { 1731 // Check the results. 1732 VIXL_CHECK(expected_length == results_length); 1733 unsigned error_count = 0; 1734 unsigned d = 0; 1735 const char* padding = " "; 1736 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1737 for (unsigned n = 0; n < inputs_n_length; n++) { 1738 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 1739 bool error_in_vector = false; 1740 1741 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1742 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 1743 (m * vd_lane_count) + lane; 1744 1745 if (results[output_index] != expected[output_index]) { 1746 error_in_vector = true; 1747 break; 1748 } 1749 } 1750 1751 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1752 printf("%s\n", name); 1753 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", 1754 lane_len_in_hex+1, padding, 1755 lane_len_in_hex+1, padding, 1756 lane_len_in_hex+1, padding, 1757 lane_len_in_hex+1, padding); 1758 1759 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1760 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 1761 (m * vd_lane_count) + lane; 1762 unsigned input_index_n = (inputs_n_length - vd_lane_count + 1763 n + 1 + lane) % inputs_n_length; 1764 unsigned input_index_m = (inputs_m_length - vd_lane_count + 1765 m + 1 + lane) % inputs_m_length; 1766 1767 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 1768 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 1769 results[output_index] != expected[output_index] ? '*' : ' ', 1770 lane_len_in_hex, 1771 static_cast<uint64_t>(inputs_d[lane]), 1772 lane_len_in_hex, 1773 static_cast<uint64_t>(inputs_n[input_index_n]), 1774 lane_len_in_hex, 1775 static_cast<uint64_t>(inputs_m[input_index_m]), 1776 lane_len_in_hex, 1777 static_cast<uint64_t>(results[output_index]), 1778 lane_len_in_hex, 1779 static_cast<uint64_t>(expected[output_index])); 1780 } 1781 } 1782 } 1783 } 1784 VIXL_ASSERT(d == expected_length); 1785 if (error_count > kErrorReportLimit) { 1786 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1787 } 1788 VIXL_CHECK(error_count == 0); 1789 } 1790 delete[] results; 1791 } 1792 1793 1794 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== 1795 1796 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, 1797 uintptr_t inputs_d, 1798 uintptr_t inputs_n, 1799 unsigned inputs_n_length, 1800 uintptr_t inputs_m, 1801 unsigned inputs_m_length, 1802 const int indices[], 1803 unsigned indices_length, 1804 uintptr_t results, 1805 VectorFormat vd_form, 1806 VectorFormat vn_form, 1807 VectorFormat vm_form) { 1808 VIXL_ASSERT(vd_form != kFormatUndefined); 1809 VIXL_ASSERT(vn_form != kFormatUndefined); 1810 VIXL_ASSERT(vm_form != kFormatUndefined); 1811 1812 SETUP(); 1813 START(); 1814 1815 // Roll up the loop to keep the code size down. 1816 Label loop_n, loop_m; 1817 1818 Register out = x0; 1819 Register inputs_n_base = x1; 1820 Register inputs_m_base = x2; 1821 Register inputs_d_base = x3; 1822 Register inputs_n_last_16bytes = x4; 1823 Register inputs_m_last_16bytes = x5; 1824 Register index_n = x6; 1825 Register index_m = x7; 1826 1827 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1828 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1829 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1830 1831 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1832 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1833 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1834 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1835 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1836 1837 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1838 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1839 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1840 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 1841 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 1842 1843 1844 // Always load and store 128 bits regardless of the format. 1845 VRegister vd = v0.V16B(); 1846 VRegister vn = v1.V16B(); 1847 VRegister vm = v2.V16B(); 1848 VRegister vntmp = v3.V16B(); 1849 VRegister vmtmp = v4.V16B(); 1850 VRegister vres = v5.V16B(); 1851 1852 // These will have the correct format for calling the 'helper'. 1853 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1854 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 1855 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 1856 1857 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1858 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1859 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 1860 1861 __ Mov(out, results); 1862 1863 __ Mov(inputs_d_base, inputs_d); 1864 1865 __ Mov(inputs_n_base, inputs_n); 1866 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 1867 __ Mov(inputs_m_base, inputs_m); 1868 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 1869 1870 __ Ldr(vd, MemOperand(inputs_d_base)); 1871 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1872 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 1873 1874 __ Mov(index_n, 0); 1875 __ Bind(&loop_n); 1876 1877 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1878 vn_lane_bytes_log2)); 1879 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1880 1881 __ Mov(index_m, 0); 1882 __ Bind(&loop_m); 1883 1884 __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL, 1885 vm_lane_bytes_log2)); 1886 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 1887 1888 __ Mov(vres, vd); 1889 { 1890 for (unsigned i = 0; i < indices_length; i++) { 1891 { 1892 SingleEmissionCheckScope guard(&masm); 1893 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); 1894 } 1895 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1896 } 1897 } 1898 1899 __ Add(index_m, index_m, 1); 1900 __ Cmp(index_m, inputs_m_length); 1901 __ B(lo, &loop_m); 1902 1903 __ Add(index_n, index_n, 1); 1904 __ Cmp(index_n, inputs_n_length); 1905 __ B(lo, &loop_n); 1906 1907 END(); 1908 RUN(); 1909 TEARDOWN(); 1910 } 1911 1912 1913 1914 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1915 // arrays of rawbit representation of input values. This ensures that 1916 // exact bit comparisons can be performed. 1917 template <typename Td, typename Tn, typename Tm> 1918 static void TestByElementNEON(const char *name, 1919 TestByElementNEONHelper_t helper, 1920 const Td inputs_d[], 1921 const Tn inputs_n[], unsigned inputs_n_length, 1922 const Tm inputs_m[], unsigned inputs_m_length, 1923 const int indices[], unsigned indices_length, 1924 const Td expected[], unsigned expected_length, 1925 VectorFormat vd_form, 1926 VectorFormat vn_form, 1927 VectorFormat vm_form) { 1928 VIXL_ASSERT(inputs_n_length > 0); 1929 VIXL_ASSERT(inputs_m_length > 0); 1930 VIXL_ASSERT(indices_length > 0); 1931 1932 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 1933 1934 const unsigned results_length = inputs_n_length * inputs_m_length * 1935 indices_length; 1936 Td* results = new Td[results_length * vd_lane_count]; 1937 const unsigned lane_bit = sizeof(Td) * 8; 1938 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 1939 1940 TestByElementNEON_Helper(helper, 1941 reinterpret_cast<uintptr_t>(inputs_d), 1942 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, 1943 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, 1944 indices, indices_length, 1945 reinterpret_cast<uintptr_t>(results), 1946 vd_form, vn_form, vm_form); 1947 1948 if (Test::sim_test_trace()) { 1949 // Print the results. 1950 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1951 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1952 printf(" "); 1953 // Output a separate result for each element of the result vector. 1954 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1955 unsigned index = lane + (iteration * vd_lane_count); 1956 printf(" 0x%0*" PRIx64 ",", 1957 lane_len_in_hex, 1958 static_cast<uint64_t>(results[index])); 1959 } 1960 printf("\n"); 1961 } 1962 1963 printf("};\n"); 1964 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1965 name, 1966 results_length); 1967 } else { 1968 // Check the results. 1969 VIXL_CHECK(expected_length == results_length); 1970 unsigned error_count = 0; 1971 unsigned d = 0; 1972 const char* padding = " "; 1973 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1974 for (unsigned n = 0; n < inputs_n_length; n++) { 1975 for (unsigned m = 0; m < inputs_m_length; m++) { 1976 for (unsigned index = 0; index < indices_length; index++, d++) { 1977 bool error_in_vector = false; 1978 1979 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1980 unsigned output_index = 1981 (n * inputs_m_length * indices_length * vd_lane_count) + 1982 (m * indices_length * vd_lane_count) + 1983 (index * vd_lane_count) + lane; 1984 1985 if (results[output_index] != expected[output_index]) { 1986 error_in_vector = true; 1987 break; 1988 } 1989 } 1990 1991 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1992 printf("%s\n", name); 1993 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", 1994 lane_len_in_hex+1, padding, 1995 lane_len_in_hex+1, padding, 1996 lane_len_in_hex+1, padding, 1997 lane_len_in_hex+1, padding); 1998 1999 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2000 unsigned output_index = 2001 (n * inputs_m_length * indices_length * vd_lane_count) + 2002 (m * indices_length * vd_lane_count) + 2003 (index * vd_lane_count) + lane; 2004 unsigned input_index_n = (inputs_n_length - vd_lane_count + 2005 n + 1 + lane) % inputs_n_length; 2006 unsigned input_index_m = (inputs_m_length - vd_lane_count + 2007 m + 1 + lane) % inputs_m_length; 2008 2009 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 2010 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2011 results[output_index] != expected[output_index] ? '*' : ' ', 2012 lane_len_in_hex, 2013 static_cast<uint64_t>(inputs_d[lane]), 2014 lane_len_in_hex, 2015 static_cast<uint64_t>(inputs_n[input_index_n]), 2016 lane_len_in_hex, 2017 static_cast<uint64_t>(inputs_m[input_index_m]), 2018 indices[index], 2019 lane_len_in_hex, 2020 static_cast<uint64_t>(results[output_index]), 2021 lane_len_in_hex, 2022 static_cast<uint64_t>(expected[output_index])); 2023 } 2024 } 2025 } 2026 } 2027 } 2028 VIXL_ASSERT(d == expected_length); 2029 if (error_count > kErrorReportLimit) { 2030 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2031 } 2032 VIXL_CHECK(error_count == 0); 2033 } 2034 delete[] results; 2035 } 2036 2037 2038 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== 2039 2040 2041 template <typename Tm> 2042 void Test2OpImmNEON_Helper( 2043 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2044 uintptr_t inputs_n, 2045 unsigned inputs_n_length, 2046 const Tm inputs_m[], 2047 unsigned inputs_m_length, 2048 uintptr_t results, 2049 VectorFormat vd_form, 2050 VectorFormat vn_form) { 2051 VIXL_ASSERT(vd_form != kFormatUndefined && 2052 vn_form != kFormatUndefined); 2053 2054 SETUP(); 2055 START(); 2056 2057 // Roll up the loop to keep the code size down. 2058 Label loop_n; 2059 2060 Register out = x0; 2061 Register inputs_n_base = x1; 2062 Register inputs_n_last_16bytes = x3; 2063 Register index_n = x5; 2064 2065 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2066 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2067 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2068 2069 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2070 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2071 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2072 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2073 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2074 2075 2076 // These will be either a D- or a Q-register form, with a single lane 2077 // (for use in scalar load and store operations). 2078 VRegister vd = VRegister(0, vd_bits); 2079 VRegister vn = v1.V16B(); 2080 VRegister vntmp = v3.V16B(); 2081 2082 // These will have the correct format for use when calling 'helper'. 2083 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 2084 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2085 2086 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2087 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2088 2089 __ Mov(out, results); 2090 2091 __ Mov(inputs_n_base, inputs_n); 2092 __ Mov(inputs_n_last_16bytes, 2093 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 2094 2095 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2096 2097 __ Mov(index_n, 0); 2098 __ Bind(&loop_n); 2099 2100 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 2101 vn_lane_bytes_log2)); 2102 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2103 2104 // Set the destination to zero for tests such as '[r]shrn2'. 2105 // TODO: Setting the destination to values other than zero might be a better 2106 // test for shift and accumulate instructions (srsra/ssra/usra/ursra). 2107 __ Movi(vd.V16B(), 0); 2108 2109 { 2110 for (unsigned i = 0; i < inputs_m_length; i++) { 2111 { 2112 SingleEmissionCheckScope guard(&masm); 2113 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); 2114 } 2115 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); 2116 } 2117 } 2118 2119 __ Add(index_n, index_n, 1); 2120 __ Cmp(index_n, inputs_n_length); 2121 __ B(lo, &loop_n); 2122 2123 END(); 2124 RUN(); 2125 TEARDOWN(); 2126 } 2127 2128 2129 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2130 // arrays of rawbit representation of input values. This ensures that 2131 // exact bit comparisons can be performed. 2132 template <typename Td, typename Tn, typename Tm> 2133 static void Test2OpImmNEON( 2134 const char * name, 2135 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2136 const Tn inputs_n[], unsigned inputs_n_length, 2137 const Tm inputs_m[], unsigned inputs_m_length, 2138 const Td expected[], unsigned expected_length, 2139 VectorFormat vd_form, 2140 VectorFormat vn_form) { 2141 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 2142 2143 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2144 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2145 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2146 2147 const unsigned results_length = inputs_n_length * inputs_m_length; 2148 Td* results = new Td[results_length * vd_lane_count]; 2149 const unsigned lane_bit = sizeof(Td) * 8; 2150 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2151 2152 Test2OpImmNEON_Helper(helper, 2153 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, 2154 inputs_m, inputs_m_length, 2155 reinterpret_cast<uintptr_t>(results), 2156 vd_form, vn_form); 2157 2158 if (Test::sim_test_trace()) { 2159 // Print the results. 2160 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2161 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2162 printf(" "); 2163 // Output a separate result for each element of the result vector. 2164 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2165 unsigned index = lane + (iteration * vd_lane_count); 2166 printf(" 0x%0*" PRIx64 ",", 2167 lane_len_in_hex, 2168 static_cast<uint64_t>(results[index])); 2169 } 2170 printf("\n"); 2171 } 2172 2173 printf("};\n"); 2174 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2175 name, 2176 results_length); 2177 } else { 2178 // Check the results. 2179 VIXL_CHECK(expected_length == results_length); 2180 unsigned error_count = 0; 2181 unsigned d = 0; 2182 const char* padding = " "; 2183 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2184 for (unsigned n = 0; n < inputs_n_length; n++) { 2185 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 2186 bool error_in_vector = false; 2187 2188 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2189 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2190 (m * vd_lane_count) + lane; 2191 2192 if (results[output_index] != expected[output_index]) { 2193 error_in_vector = true; 2194 break; 2195 } 2196 } 2197 2198 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2199 printf("%s\n", name); 2200 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2201 lane_len_in_hex+1, padding, 2202 lane_len_in_hex, padding, 2203 lane_len_in_hex+1, padding); 2204 2205 const unsigned first_index_n = 2206 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 2207 2208 for (unsigned lane = 0; 2209 lane < std::max(vd_lane_count, vn_lane_count); 2210 lane++) { 2211 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2212 (m * vd_lane_count) + lane; 2213 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 2214 unsigned input_index_m = m; 2215 2216 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 2217 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2218 results[output_index] != expected[output_index] ? '*' : ' ', 2219 lane_len_in_hex, 2220 static_cast<uint64_t>(inputs_n[input_index_n]), 2221 lane_len_in_hex, 2222 static_cast<uint64_t>(inputs_m[input_index_m]), 2223 lane_len_in_hex, 2224 static_cast<uint64_t>(results[output_index]), 2225 lane_len_in_hex, 2226 static_cast<uint64_t>(expected[output_index])); 2227 } 2228 } 2229 } 2230 } 2231 VIXL_ASSERT(d == expected_length); 2232 if (error_count > kErrorReportLimit) { 2233 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2234 } 2235 VIXL_CHECK(error_count == 0); 2236 } 2237 delete[] results; 2238 } 2239 2240 2241 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== 2242 2243 2244 static void TestOpImmOpImmNEON_Helper( 2245 TestOpImmOpImmVdUpdateNEONHelper_t helper, 2246 uintptr_t inputs_d, 2247 const int inputs_imm1[], unsigned inputs_imm1_length, 2248 uintptr_t inputs_n, unsigned inputs_n_length, 2249 const int inputs_imm2[], unsigned inputs_imm2_length, 2250 uintptr_t results, 2251 VectorFormat vd_form, VectorFormat vn_form) { 2252 VIXL_ASSERT(vd_form != kFormatUndefined); 2253 VIXL_ASSERT(vn_form != kFormatUndefined); 2254 2255 SETUP(); 2256 START(); 2257 2258 // Roll up the loop to keep the code size down. 2259 Label loop_n; 2260 2261 Register out = x0; 2262 Register inputs_d_base = x1; 2263 Register inputs_n_base = x2; 2264 Register inputs_n_last_vector = x4; 2265 Register index_n = x6; 2266 2267 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2268 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2269 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2270 2271 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2272 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2273 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2274 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2275 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2276 2277 2278 // These will be either a D- or a Q-register form, with a single lane 2279 // (for use in scalar load and store operations). 2280 VRegister vd = VRegister(0, vd_bits); 2281 VRegister vn = VRegister(1, vn_bits); 2282 VRegister vntmp = VRegister(4, vn_bits); 2283 VRegister vres = VRegister(5, vn_bits); 2284 2285 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2286 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2287 2288 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2289 VRegister vntmp_single = VRegister(4, vn_lane_bits); 2290 2291 // Same registers for use in the 'ext' instructions. 2292 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 2293 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 2294 2295 __ Mov(out, results); 2296 2297 __ Mov(inputs_d_base, inputs_d); 2298 2299 __ Mov(inputs_n_base, inputs_n); 2300 __ Mov(inputs_n_last_vector, 2301 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 2302 2303 __ Ldr(vd, MemOperand(inputs_d_base)); 2304 2305 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 2306 2307 __ Mov(index_n, 0); 2308 __ Bind(&loop_n); 2309 2310 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 2311 vn_lane_bytes_log2)); 2312 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 2313 2314 { 2315 EmissionCheckScope guard(&masm, 2316 kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3); 2317 for (unsigned i = 0; i < inputs_imm1_length; i++) { 2318 for (unsigned j = 0; j < inputs_imm2_length; j++) { 2319 __ Mov(vres, vd); 2320 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); 2321 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); 2322 } 2323 } 2324 } 2325 2326 __ Add(index_n, index_n, 1); 2327 __ Cmp(index_n, inputs_n_length); 2328 __ B(lo, &loop_n); 2329 2330 END(); 2331 RUN(); 2332 TEARDOWN(); 2333 } 2334 2335 2336 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2337 // arrays of rawbit representation of input values. This ensures that 2338 // exact bit comparisons can be performed. 2339 template <typename Td, typename Tn> 2340 static void TestOpImmOpImmNEON(const char * name, 2341 TestOpImmOpImmVdUpdateNEONHelper_t helper, 2342 const Td inputs_d[], 2343 const int inputs_imm1[], 2344 unsigned inputs_imm1_length, 2345 const Tn inputs_n[], 2346 unsigned inputs_n_length, 2347 const int inputs_imm2[], 2348 unsigned inputs_imm2_length, 2349 const Td expected[], 2350 unsigned expected_length, 2351 VectorFormat vd_form, 2352 VectorFormat vn_form) { 2353 VIXL_ASSERT(inputs_n_length > 0); 2354 VIXL_ASSERT(inputs_imm1_length > 0); 2355 VIXL_ASSERT(inputs_imm2_length > 0); 2356 2357 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2358 2359 const unsigned results_length = inputs_n_length * 2360 inputs_imm1_length * inputs_imm2_length; 2361 2362 Td* results = new Td[results_length * vd_lane_count]; 2363 const unsigned lane_bit = sizeof(Td) * 8; 2364 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2365 2366 TestOpImmOpImmNEON_Helper(helper, 2367 reinterpret_cast<uintptr_t>(inputs_d), 2368 inputs_imm1, 2369 inputs_imm1_length, 2370 reinterpret_cast<uintptr_t>(inputs_n), 2371 inputs_n_length, 2372 inputs_imm2, 2373 inputs_imm2_length, 2374 reinterpret_cast<uintptr_t>(results), 2375 vd_form, vn_form); 2376 2377 if (Test::sim_test_trace()) { 2378 // Print the results. 2379 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2380 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2381 printf(" "); 2382 // Output a separate result for each element of the result vector. 2383 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2384 unsigned index = lane + (iteration * vd_lane_count); 2385 printf(" 0x%0*" PRIx64 ",", 2386 lane_len_in_hex, 2387 static_cast<uint64_t>(results[index])); 2388 } 2389 printf("\n"); 2390 } 2391 2392 printf("};\n"); 2393 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2394 name, 2395 results_length); 2396 } else { 2397 // Check the results. 2398 VIXL_CHECK(expected_length == results_length); 2399 unsigned error_count = 0; 2400 unsigned counted_length = 0; 2401 const char* padding = " "; 2402 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2403 for (unsigned n = 0; n < inputs_n_length; n++) { 2404 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { 2405 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { 2406 bool error_in_vector = false; 2407 2408 counted_length++; 2409 2410 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2411 unsigned output_index = 2412 (n * inputs_imm1_length * 2413 inputs_imm2_length * vd_lane_count) + 2414 (imm1 * inputs_imm2_length * vd_lane_count) + 2415 (imm2 * vd_lane_count) + lane; 2416 2417 if (results[output_index] != expected[output_index]) { 2418 error_in_vector = true; 2419 break; 2420 } 2421 } 2422 2423 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2424 printf("%s\n", name); 2425 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2426 lane_len_in_hex+1, padding, 2427 lane_len_in_hex, padding, 2428 lane_len_in_hex+1, padding, 2429 lane_len_in_hex, padding, 2430 lane_len_in_hex+1, padding); 2431 2432 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2433 unsigned output_index = 2434 (n * inputs_imm1_length * 2435 inputs_imm2_length * vd_lane_count) + 2436 (imm1 * inputs_imm2_length * vd_lane_count) + 2437 (imm2 * vd_lane_count) + lane; 2438 unsigned input_index_n = (inputs_n_length - vd_lane_count + 2439 n + 1 + lane) % inputs_n_length; 2440 unsigned input_index_imm1 = imm1; 2441 unsigned input_index_imm2 = imm2; 2442 2443 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 2444 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2445 results[output_index] != 2446 expected[output_index] ? '*' : ' ', 2447 lane_len_in_hex, 2448 static_cast<uint64_t>(inputs_d[lane]), 2449 lane_len_in_hex, 2450 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), 2451 lane_len_in_hex, 2452 static_cast<uint64_t>(inputs_n[input_index_n]), 2453 lane_len_in_hex, 2454 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), 2455 lane_len_in_hex, 2456 static_cast<uint64_t>(results[output_index]), 2457 lane_len_in_hex, 2458 static_cast<uint64_t>(expected[output_index])); 2459 } 2460 } 2461 } 2462 } 2463 } 2464 VIXL_ASSERT(counted_length == expected_length); 2465 if (error_count > kErrorReportLimit) { 2466 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2467 } 2468 VIXL_CHECK(error_count == 0); 2469 } 2470 delete[] results; 2471 } 2472 2473 2474 // ==== Floating-point tests. ==== 2475 2476 2477 // Standard floating-point test expansion for both double- and single-precision 2478 // operations. 2479 #define STRINGIFY(s) #s 2480 2481 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \ 2482 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \ 2483 &MacroAssembler::mnemonic, \ 2484 input, sizeof(input) / sizeof(input[0]), \ 2485 kExpected_##mnemonic##_##variant, \ 2486 kExpectedCount_##mnemonic##_##variant) 2487 2488 #define DEFINE_TEST_FP(mnemonic, type, input) \ 2489 TEST(mnemonic##_d) { \ 2490 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \ 2491 } \ 2492 TEST(mnemonic##_s) { \ 2493 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \ 2494 } 2495 2496 // TODO: Test with a newer version of valgrind. 2497 // 2498 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64. 2499 // Therefore this test will be exiting though an ASSERT and thus leaking 2500 // memory. 2501 DEFINE_TEST_FP(fmadd, 3Op, Basic) 2502 DEFINE_TEST_FP(fmsub, 3Op, Basic) 2503 DEFINE_TEST_FP(fnmadd, 3Op, Basic) 2504 DEFINE_TEST_FP(fnmsub, 3Op, Basic) 2505 2506 DEFINE_TEST_FP(fadd, 2Op, Basic) 2507 DEFINE_TEST_FP(fdiv, 2Op, Basic) 2508 DEFINE_TEST_FP(fmax, 2Op, Basic) 2509 DEFINE_TEST_FP(fmaxnm, 2Op, Basic) 2510 DEFINE_TEST_FP(fmin, 2Op, Basic) 2511 DEFINE_TEST_FP(fminnm, 2Op, Basic) 2512 DEFINE_TEST_FP(fmul, 2Op, Basic) 2513 DEFINE_TEST_FP(fsub, 2Op, Basic) 2514 DEFINE_TEST_FP(fnmul, 2Op, Basic) 2515 2516 DEFINE_TEST_FP(fabs, 1Op, Basic) 2517 DEFINE_TEST_FP(fmov, 1Op, Basic) 2518 DEFINE_TEST_FP(fneg, 1Op, Basic) 2519 DEFINE_TEST_FP(fsqrt, 1Op, Basic) 2520 DEFINE_TEST_FP(frinta, 1Op, Conversions) 2521 DEFINE_TEST_FP(frinti, 1Op, Conversions) 2522 DEFINE_TEST_FP(frintm, 1Op, Conversions) 2523 DEFINE_TEST_FP(frintn, 1Op, Conversions) 2524 DEFINE_TEST_FP(frintp, 1Op, Conversions) 2525 DEFINE_TEST_FP(frintx, 1Op, Conversions) 2526 DEFINE_TEST_FP(frintz, 1Op, Conversions) 2527 2528 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); } 2529 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); } 2530 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); } 2531 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); } 2532 2533 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); } 2534 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); } 2535 2536 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \ 2537 TEST(mnemonic##_xd) { \ 2538 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \ 2539 } \ 2540 TEST(mnemonic##_xs) { \ 2541 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \ 2542 } \ 2543 TEST(mnemonic##_wd) { \ 2544 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \ 2545 } \ 2546 TEST(mnemonic##_ws) { \ 2547 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \ 2548 } 2549 2550 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions) 2551 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions) 2552 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions) 2553 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions) 2554 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions) 2555 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions) 2556 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions) 2557 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions) 2558 2559 // TODO: Scvtf-fixed-point 2560 // TODO: Scvtf-integer 2561 // TODO: Ucvtf-fixed-point 2562 // TODO: Ucvtf-integer 2563 2564 // TODO: Fccmp 2565 // TODO: Fcsel 2566 2567 2568 // ==== NEON Tests. ==== 2569 2570 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, \ 2571 vdform, vnform, \ 2572 input_n) \ 2573 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2574 &MacroAssembler::mnemonic, \ 2575 input_n, \ 2576 (sizeof(input_n) / sizeof(input_n[0])), \ 2577 kExpected_NEON_##mnemonic##_##vdform, \ 2578 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2579 kFormat##vdform, \ 2580 kFormat##vnform) 2581 2582 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, \ 2583 vdform, vnform, \ 2584 input_n) \ 2585 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) \ 2586 "_" STRINGIFY(vnform), \ 2587 &MacroAssembler::mnemonic, \ 2588 input_n, \ 2589 (sizeof(input_n) / sizeof(input_n[0])), \ 2590 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ 2591 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \ 2592 kFormat##vdform, \ 2593 kFormat##vnform) 2594 2595 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 2596 vdform, vnform, vmform, \ 2597 input_d, input_n, input_m) \ 2598 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2599 &MacroAssembler::mnemonic, \ 2600 input_d, \ 2601 input_n, \ 2602 (sizeof(input_n) / sizeof(input_n[0])), \ 2603 input_m, \ 2604 (sizeof(input_m) / sizeof(input_m[0])), \ 2605 kExpected_NEON_##mnemonic##_##vdform, \ 2606 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2607 kFormat##vdform, \ 2608 kFormat##vnform, \ 2609 kFormat##vmform) 2610 2611 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 2612 vdform, vnform, \ 2613 input_n, input_m) \ 2614 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ 2615 &MacroAssembler::mnemonic, \ 2616 input_n, \ 2617 (sizeof(input_n) / sizeof(input_n[0])), \ 2618 input_m, \ 2619 (sizeof(input_m) / sizeof(input_m[0])), \ 2620 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ 2621 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ 2622 kFormat##vdform, \ 2623 kFormat##vnform) 2624 2625 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 2626 vdform, vnform, vmform, \ 2627 input_d, input_n, input_m, indices) \ 2628 TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) \ 2629 "_" STRINGIFY(vnform) "_" STRINGIFY(vmform), \ 2630 &MacroAssembler::mnemonic, \ 2631 input_d, \ 2632 input_n, \ 2633 (sizeof(input_n) / sizeof(input_n[0])), \ 2634 input_m, \ 2635 (sizeof(input_m) / sizeof(input_m[0])), \ 2636 indices, \ 2637 (sizeof(indices) / sizeof(indices[0])), \ 2638 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 2639 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 2640 kFormat##vdform, \ 2641 kFormat##vnform, \ 2642 kFormat##vmform) 2643 2644 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \ 2645 mnemonic, \ 2646 vdform, vnform, \ 2647 input_d, input_imm1, \ 2648 input_n, input_imm2) \ 2649 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2650 helper, \ 2651 input_d, \ 2652 input_imm1, \ 2653 (sizeof(input_imm1) / sizeof(input_imm1[0])), \ 2654 input_n, \ 2655 (sizeof(input_n) / sizeof(input_n[0])), \ 2656 input_imm2, \ 2657 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ 2658 kExpected_NEON_##mnemonic##_##vdform, \ 2659 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2660 kFormat##vdform, \ 2661 kFormat##vnform) 2662 2663 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ 2664 CALL_TEST_NEON_HELPER_1Op(mnemonic, \ 2665 variant, variant, \ 2666 input) 2667 2668 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 2669 TEST(mnemonic##_8B) { \ 2670 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ 2671 } \ 2672 TEST(mnemonic##_16B) { \ 2673 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ 2674 } 2675 2676 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ 2677 TEST(mnemonic##_4H) { \ 2678 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ 2679 } \ 2680 TEST(mnemonic##_8H) { \ 2681 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ 2682 } 2683 2684 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 2685 TEST(mnemonic##_2S) { \ 2686 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ 2687 } \ 2688 TEST(mnemonic##_4S) { \ 2689 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ 2690 } 2691 2692 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 2693 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 2694 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) 2695 2696 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 2697 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 2698 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) 2699 2700 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ 2701 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 2702 TEST(mnemonic##_2D) { \ 2703 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 2704 } 2705 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ 2706 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 2707 TEST(mnemonic##_2D) { \ 2708 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 2709 } 2710 2711 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ 2712 TEST(mnemonic##_2S) { \ 2713 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ 2714 } \ 2715 TEST(mnemonic##_4S) { \ 2716 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ 2717 } \ 2718 TEST(mnemonic##_2D) { \ 2719 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ 2720 } 2721 2722 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \ 2723 TEST(mnemonic##_S) { \ 2724 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ 2725 } \ 2726 TEST(mnemonic##_D) { \ 2727 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ 2728 } 2729 2730 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 2731 TEST(mnemonic##_B) { \ 2732 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ 2733 } 2734 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 2735 TEST(mnemonic##_H) { \ 2736 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ 2737 } 2738 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2739 TEST(mnemonic##_S) { \ 2740 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ 2741 } 2742 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ 2743 TEST(mnemonic##_D) { \ 2744 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ 2745 } 2746 2747 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ 2748 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 2749 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 2750 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2751 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 2752 2753 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ 2754 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2755 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 2756 2757 2758 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ 2759 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, \ 2760 vd_form, vn_form, \ 2761 input_n) 2762 2763 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ 2764 TEST(mnemonic##_B_8B) { \ 2765 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ 2766 } \ 2767 TEST(mnemonic##_B_16B) { \ 2768 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ 2769 } \ 2770 TEST(mnemonic##_H_4H) { \ 2771 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ 2772 } \ 2773 TEST(mnemonic##_H_8H) { \ 2774 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ 2775 } \ 2776 TEST(mnemonic##_S_4S) { \ 2777 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ 2778 } 2779 2780 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ 2781 TEST(mnemonic##_H_8B) { \ 2782 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ 2783 } \ 2784 TEST(mnemonic##_H_16B) { \ 2785 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ 2786 } \ 2787 TEST(mnemonic##_S_4H) { \ 2788 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ 2789 } \ 2790 TEST(mnemonic##_S_8H) { \ 2791 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ 2792 } \ 2793 TEST(mnemonic##_D_4S) { \ 2794 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ 2795 } 2796 2797 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ 2798 TEST(mnemonic##_S_4S) { \ 2799 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ 2800 } 2801 2802 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, \ 2803 vdform, vnform, \ 2804 input_n) \ 2805 CALL_TEST_NEON_HELPER_1Op(mnemonic, \ 2806 vdform, vnform, \ 2807 input_n) 2808 2809 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ 2810 TEST(mnemonic##_4H) { \ 2811 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ 2812 } \ 2813 TEST(mnemonic##_8H) { \ 2814 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ 2815 } \ 2816 TEST(mnemonic##_2S) { \ 2817 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ 2818 } \ 2819 TEST(mnemonic##_4S) { \ 2820 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ 2821 } \ 2822 TEST(mnemonic##_1D) { \ 2823 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ 2824 } \ 2825 TEST(mnemonic##_2D) { \ 2826 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ 2827 } 2828 2829 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ 2830 TEST(mnemonic##_8B) { \ 2831 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ 2832 } \ 2833 TEST(mnemonic##_4H) { \ 2834 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ 2835 } \ 2836 TEST(mnemonic##_2S) { \ 2837 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ 2838 } \ 2839 TEST(mnemonic##2_16B) { \ 2840 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\ 2841 } \ 2842 TEST(mnemonic##2_8H) { \ 2843 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ 2844 } \ 2845 TEST(mnemonic##2_4S) { \ 2846 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ 2847 } 2848 2849 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ 2850 TEST(mnemonic##_4S) { \ 2851 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ 2852 } \ 2853 TEST(mnemonic##_2D) { \ 2854 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ 2855 } \ 2856 TEST(mnemonic##2_4S) { \ 2857 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\ 2858 } \ 2859 TEST(mnemonic##2_2D) { \ 2860 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ 2861 } 2862 2863 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ 2864 TEST(mnemonic##_4H) { \ 2865 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ 2866 } \ 2867 TEST(mnemonic##_2S) { \ 2868 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 2869 } \ 2870 TEST(mnemonic##2_8H) { \ 2871 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ 2872 } \ 2873 TEST(mnemonic##2_4S) { \ 2874 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 2875 } 2876 2877 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ 2878 TEST(mnemonic##_2S) { \ 2879 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 2880 } \ 2881 TEST(mnemonic##2_4S) { \ 2882 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 2883 } 2884 2885 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ 2886 TEST(mnemonic##_B) { \ 2887 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ 2888 } \ 2889 TEST(mnemonic##_H) { \ 2890 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ 2891 } \ 2892 TEST(mnemonic##_S) { \ 2893 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ 2894 } 2895 2896 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ 2897 TEST(mnemonic##_S) { \ 2898 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ 2899 } \ 2900 TEST(mnemonic##_D) { \ 2901 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ 2902 } 2903 2904 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) { \ 2905 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 2906 variant, variant, variant, \ 2907 input_d, input_nm, input_nm); \ 2908 } 2909 2910 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 2911 TEST(mnemonic##_8B) { \ 2912 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, \ 2913 kInput8bitsAccDestination, \ 2914 kInput8bits##input); \ 2915 } \ 2916 TEST(mnemonic##_16B) { \ 2917 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, \ 2918 kInput8bitsAccDestination, \ 2919 kInput8bits##input); \ 2920 } \ 2921 2922 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ 2923 TEST(mnemonic##_4H) { \ 2924 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, \ 2925 kInput16bitsAccDestination, \ 2926 kInput16bits##input); \ 2927 } \ 2928 TEST(mnemonic##_8H) { \ 2929 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, \ 2930 kInput16bitsAccDestination, \ 2931 kInput16bits##input); \ 2932 } \ 2933 TEST(mnemonic##_2S) { \ 2934 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, \ 2935 kInput32bitsAccDestination, \ 2936 kInput32bits##input); \ 2937 } \ 2938 TEST(mnemonic##_4S) { \ 2939 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, \ 2940 kInput32bitsAccDestination, \ 2941 kInput32bits##input); \ 2942 } 2943 2944 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 2945 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 2946 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) 2947 2948 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ 2949 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 2950 TEST(mnemonic##_2D) { \ 2951 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, \ 2952 kInput64bitsAccDestination, \ 2953 kInput64bits##input); \ 2954 } 2955 2956 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ 2957 TEST(mnemonic##_2S) { \ 2958 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, \ 2959 kInputFloatAccDestination, \ 2960 kInputFloat##input); \ 2961 } \ 2962 TEST(mnemonic##_4S) { \ 2963 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, \ 2964 kInputFloatAccDestination, \ 2965 kInputFloat##input); \ 2966 } \ 2967 TEST(mnemonic##_2D) { \ 2968 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, \ 2969 kInputDoubleAccDestination, \ 2970 kInputDouble##input); \ 2971 } 2972 2973 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ 2974 TEST(mnemonic##_D) { \ 2975 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \ 2976 kInput64bitsAccDestination, \ 2977 kInput64bits##input); \ 2978 } 2979 2980 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ 2981 TEST(mnemonic##_H) { \ 2982 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, \ 2983 kInput16bitsAccDestination, \ 2984 kInput16bits##input); \ 2985 } \ 2986 TEST(mnemonic##_S) { \ 2987 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \ 2988 kInput32bitsAccDestination, \ 2989 kInput32bits##input); \ 2990 } \ 2991 2992 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ 2993 TEST(mnemonic##_B) { \ 2994 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, \ 2995 kInput8bitsAccDestination, \ 2996 kInput8bits##input); \ 2997 } \ 2998 TEST(mnemonic##_H) { \ 2999 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, \ 3000 kInput16bitsAccDestination, \ 3001 kInput16bits##input); \ 3002 } \ 3003 TEST(mnemonic##_S) { \ 3004 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \ 3005 kInput32bitsAccDestination, \ 3006 kInput32bits##input); \ 3007 } \ 3008 TEST(mnemonic##_D) { \ 3009 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \ 3010 kInput64bitsAccDestination, \ 3011 kInput64bits##input); \ 3012 } 3013 3014 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ 3015 TEST(mnemonic##_S) { \ 3016 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \ 3017 kInputFloatAccDestination, \ 3018 kInputFloat##input); \ 3019 } \ 3020 TEST(mnemonic##_D) { \ 3021 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \ 3022 kInputDoubleAccDestination, \ 3023 kInputDouble##input); \ 3024 } 3025 3026 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3027 vdform, vnform, vmform, \ 3028 input_d, input_n, input_m) { \ 3029 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3030 vdform, vnform, vmform, \ 3031 input_d, input_n, input_m); \ 3032 } 3033 3034 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3035 TEST(mnemonic##_8H) { \ 3036 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \ 3037 kInput16bitsAccDestination, \ 3038 kInput8bits##input, kInput8bits##input); \ 3039 } \ 3040 TEST(mnemonic##2_8H) { \ 3041 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \ 3042 kInput16bitsAccDestination, \ 3043 kInput8bits##input, kInput8bits##input); \ 3044 } 3045 3046 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3047 TEST(mnemonic##_4S) { \ 3048 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \ 3049 kInput32bitsAccDestination, \ 3050 kInput16bits##input, kInput16bits##input); \ 3051 } \ 3052 TEST(mnemonic##2_4S) { \ 3053 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \ 3054 kInput32bitsAccDestination, \ 3055 kInput16bits##input, kInput16bits##input); \ 3056 } 3057 3058 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ 3059 TEST(mnemonic##_2D) { \ 3060 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \ 3061 kInput64bitsAccDestination, \ 3062 kInput32bits##input, kInput32bits##input); \ 3063 } \ 3064 TEST(mnemonic##2_2D) { \ 3065 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \ 3066 kInput64bitsAccDestination, \ 3067 kInput32bits##input, kInput32bits##input); \ 3068 } 3069 3070 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ 3071 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3072 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3073 3074 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ 3075 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3076 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3077 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3078 3079 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3080 TEST(mnemonic##_S) { \ 3081 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, \ 3082 kInput32bitsAccDestination, \ 3083 kInput16bits##input, \ 3084 kInput16bits##input); \ 3085 } 3086 3087 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ 3088 TEST(mnemonic##_D) { \ 3089 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, \ 3090 kInput64bitsAccDestination, \ 3091 kInput32bits##input, \ 3092 kInput32bits##input); \ 3093 } 3094 3095 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ 3096 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3097 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) 3098 3099 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ 3100 TEST(mnemonic##_8H) { \ 3101 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \ 3102 kInput16bitsAccDestination, \ 3103 kInput16bits##input, kInput8bits##input); \ 3104 } \ 3105 TEST(mnemonic##_4S) { \ 3106 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \ 3107 kInput32bitsAccDestination, \ 3108 kInput32bits##input, kInput16bits##input); \ 3109 } \ 3110 TEST(mnemonic##_2D) { \ 3111 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \ 3112 kInput64bitsAccDestination, \ 3113 kInput64bits##input, kInput32bits##input); \ 3114 } \ 3115 TEST(mnemonic##2_8H) { \ 3116 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \ 3117 kInput16bitsAccDestination, \ 3118 kInput16bits##input, kInput8bits##input); \ 3119 } \ 3120 TEST(mnemonic##2_4S) { \ 3121 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \ 3122 kInput32bitsAccDestination, \ 3123 kInput32bits##input, kInput16bits##input); \ 3124 } \ 3125 TEST(mnemonic##2_2D) { \ 3126 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \ 3127 kInput64bitsAccDestination, \ 3128 kInput64bits##input, kInput32bits##input); \ 3129 } 3130 3131 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ 3132 TEST(mnemonic##_8B) { \ 3133 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \ 3134 kInput8bitsAccDestination, \ 3135 kInput16bits##input, kInput16bits##input); \ 3136 } \ 3137 TEST(mnemonic##_4H) { \ 3138 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \ 3139 kInput16bitsAccDestination, \ 3140 kInput32bits##input, kInput32bits##input); \ 3141 } \ 3142 TEST(mnemonic##_2S) { \ 3143 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \ 3144 kInput32bitsAccDestination, \ 3145 kInput64bits##input, kInput64bits##input); \ 3146 } \ 3147 TEST(mnemonic##2_16B) { \ 3148 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \ 3149 kInput8bitsAccDestination, \ 3150 kInput16bits##input, kInput16bits##input); \ 3151 } \ 3152 TEST(mnemonic##2_8H) { \ 3153 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \ 3154 kInput16bitsAccDestination, \ 3155 kInput32bits##input, kInput32bits##input); \ 3156 } \ 3157 TEST(mnemonic##2_4S) { \ 3158 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \ 3159 kInput32bitsAccDestination, \ 3160 kInput64bits##input, kInput64bits##input); \ 3161 } 3162 3163 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3164 vdform, vnform, \ 3165 input_n, \ 3166 input_imm) { \ 3167 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 3168 vdform, vnform, \ 3169 input_n, input_imm); \ 3170 } 3171 3172 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ 3173 TEST(mnemonic##_8B_2OPIMM) { \ 3174 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3175 8B, 8B, \ 3176 kInput8bits##input, \ 3177 kInput8bitsImm##input_imm); \ 3178 } \ 3179 TEST(mnemonic##_16B_2OPIMM) { \ 3180 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3181 16B, 16B, \ 3182 kInput8bits##input, \ 3183 kInput8bitsImm##input_imm); \ 3184 } \ 3185 TEST(mnemonic##_4H_2OPIMM) { \ 3186 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3187 4H, 4H, \ 3188 kInput16bits##input, \ 3189 kInput16bitsImm##input_imm); \ 3190 } \ 3191 TEST(mnemonic##_8H_2OPIMM) { \ 3192 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3193 8H, 8H, \ 3194 kInput16bits##input, \ 3195 kInput16bitsImm##input_imm); \ 3196 } \ 3197 TEST(mnemonic##_2S_2OPIMM) { \ 3198 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3199 2S, 2S, \ 3200 kInput32bits##input, \ 3201 kInput32bitsImm##input_imm); \ 3202 } \ 3203 TEST(mnemonic##_4S_2OPIMM) { \ 3204 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3205 4S, 4S, \ 3206 kInput32bits##input, \ 3207 kInput32bitsImm##input_imm); \ 3208 } \ 3209 TEST(mnemonic##_2D_2OPIMM) { \ 3210 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3211 2D, 2D, \ 3212 kInput64bits##input, \ 3213 kInput64bitsImm##input_imm); \ 3214 } 3215 3216 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ 3217 TEST(mnemonic##_8B_2OPIMM) { \ 3218 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3219 8B, B, \ 3220 kInput8bits##input, \ 3221 kInput8bitsImm##input_imm); \ 3222 } \ 3223 TEST(mnemonic##_16B_2OPIMM) { \ 3224 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3225 16B, B, \ 3226 kInput8bits##input, \ 3227 kInput8bitsImm##input_imm); \ 3228 } \ 3229 TEST(mnemonic##_4H_2OPIMM) { \ 3230 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3231 4H, H, \ 3232 kInput16bits##input, \ 3233 kInput16bitsImm##input_imm); \ 3234 } \ 3235 TEST(mnemonic##_8H_2OPIMM) { \ 3236 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3237 8H, H, \ 3238 kInput16bits##input, \ 3239 kInput16bitsImm##input_imm); \ 3240 } \ 3241 TEST(mnemonic##_2S_2OPIMM) { \ 3242 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3243 2S, S, \ 3244 kInput32bits##input, \ 3245 kInput32bitsImm##input_imm); \ 3246 } \ 3247 TEST(mnemonic##_4S_2OPIMM) { \ 3248 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3249 4S, S, \ 3250 kInput32bits##input, \ 3251 kInput32bitsImm##input_imm); \ 3252 } \ 3253 TEST(mnemonic##_2D_2OPIMM) { \ 3254 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3255 2D, D, \ 3256 kInput64bits##input, \ 3257 kInput64bitsImm##input_imm); \ 3258 } 3259 3260 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ 3261 TEST(mnemonic##_8B_2OPIMM) { \ 3262 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3263 8B, 8H, \ 3264 kInput16bits##input, \ 3265 kInput8bitsImm##input_imm); \ 3266 } \ 3267 TEST(mnemonic##_4H_2OPIMM) { \ 3268 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3269 4H, 4S, \ 3270 kInput32bits##input, \ 3271 kInput16bitsImm##input_imm); \ 3272 } \ 3273 TEST(mnemonic##_2S_2OPIMM) { \ 3274 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3275 2S, 2D, \ 3276 kInput64bits##input, \ 3277 kInput32bitsImm##input_imm); \ 3278 } \ 3279 TEST(mnemonic##2_16B_2OPIMM) { \ 3280 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3281 16B, 8H, \ 3282 kInput16bits##input, \ 3283 kInput8bitsImm##input_imm); \ 3284 } \ 3285 TEST(mnemonic##2_8H_2OPIMM) { \ 3286 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3287 8H, 4S, \ 3288 kInput32bits##input, \ 3289 kInput16bitsImm##input_imm); \ 3290 } \ 3291 TEST(mnemonic##2_4S_2OPIMM) { \ 3292 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3293 4S, 2D, \ 3294 kInput64bits##input, \ 3295 kInput32bitsImm##input_imm); \ 3296 } 3297 3298 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ 3299 TEST(mnemonic##_B_2OPIMM) { \ 3300 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3301 B, H, \ 3302 kInput16bits##input, \ 3303 kInput8bitsImm##input_imm); \ 3304 } \ 3305 TEST(mnemonic##_H_2OPIMM) { \ 3306 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3307 H, S, \ 3308 kInput32bits##input, \ 3309 kInput16bitsImm##input_imm); \ 3310 } \ 3311 TEST(mnemonic##_S_2OPIMM) { \ 3312 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3313 S, D, \ 3314 kInput64bits##input, \ 3315 kInput32bitsImm##input_imm); \ 3316 } 3317 3318 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ 3319 TEST(mnemonic##_2S_2OPIMM) { \ 3320 CALL_TEST_NEON_HELPER_2OPIMM( \ 3321 mnemonic, \ 3322 2S, 2S, \ 3323 kInputFloat##Basic, \ 3324 kInputDoubleImm##input_imm) \ 3325 } \ 3326 TEST(mnemonic##_4S_2OPIMM) { \ 3327 CALL_TEST_NEON_HELPER_2OPIMM( \ 3328 mnemonic, \ 3329 4S, 4S, \ 3330 kInputFloat##input, \ 3331 kInputDoubleImm##input_imm); \ 3332 } \ 3333 TEST(mnemonic##_2D_2OPIMM) { \ 3334 CALL_TEST_NEON_HELPER_2OPIMM( \ 3335 mnemonic, \ 3336 2D, 2D, \ 3337 kInputDouble##input, \ 3338 kInputDoubleImm##input_imm); \ 3339 } 3340 3341 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ 3342 TEST(mnemonic##_2S_2OPIMM) { \ 3343 CALL_TEST_NEON_HELPER_2OPIMM( \ 3344 mnemonic, \ 3345 2S, 2S, \ 3346 kInputFloat##Basic, \ 3347 kInput32bitsImm##input_imm) \ 3348 } \ 3349 TEST(mnemonic##_4S_2OPIMM) { \ 3350 CALL_TEST_NEON_HELPER_2OPIMM( \ 3351 mnemonic, \ 3352 4S, 4S, \ 3353 kInputFloat##input, \ 3354 kInput32bitsImm##input_imm) \ 3355 } \ 3356 TEST(mnemonic##_2D_2OPIMM) { \ 3357 CALL_TEST_NEON_HELPER_2OPIMM( \ 3358 mnemonic, \ 3359 2D, 2D, \ 3360 kInputDouble##input, \ 3361 kInput64bitsImm##input_imm) \ 3362 } 3363 3364 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ 3365 TEST(mnemonic##_S_2OPIMM) { \ 3366 CALL_TEST_NEON_HELPER_2OPIMM( \ 3367 mnemonic, \ 3368 S, S, \ 3369 kInputFloat##Basic, \ 3370 kInput32bitsImm##input_imm) \ 3371 } \ 3372 TEST(mnemonic##_D_2OPIMM) { \ 3373 CALL_TEST_NEON_HELPER_2OPIMM( \ 3374 mnemonic, \ 3375 D, D, \ 3376 kInputDouble##input, \ 3377 kInput64bitsImm##input_imm) \ 3378 } 3379 3380 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \ 3381 TEST(mnemonic##_2S_2OPIMM) { \ 3382 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3383 2S, 2S, \ 3384 kInput32bits##input, \ 3385 kInput32bitsImm##input_imm); \ 3386 } \ 3387 TEST(mnemonic##_4S_2OPIMM) { \ 3388 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3389 4S, 4S, \ 3390 kInput32bits##input, \ 3391 kInput32bitsImm##input_imm); \ 3392 } \ 3393 TEST(mnemonic##_2D_2OPIMM) { \ 3394 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3395 2D, 2D, \ 3396 kInput64bits##input, \ 3397 kInput64bitsImm##input_imm); \ 3398 } 3399 3400 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ 3401 TEST(mnemonic##_D_2OPIMM) { \ 3402 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3403 D, D, \ 3404 kInput64bits##input, \ 3405 kInput64bitsImm##input_imm); \ 3406 } 3407 3408 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \ 3409 TEST(mnemonic##_S_2OPIMM) { \ 3410 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3411 S, S, \ 3412 kInput32bits##input, \ 3413 kInput32bitsImm##input_imm); \ 3414 } \ 3415 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) 3416 3417 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ 3418 TEST(mnemonic##_D_2OPIMM) { \ 3419 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3420 D, D, \ 3421 kInputDouble##input, \ 3422 kInputDoubleImm##input_imm); \ 3423 } 3424 3425 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \ 3426 TEST(mnemonic##_S_2OPIMM) { \ 3427 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3428 S, S, \ 3429 kInputFloat##input, \ 3430 kInputDoubleImm##input_imm); \ 3431 } \ 3432 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) 3433 3434 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ 3435 TEST(mnemonic##_B_2OPIMM) { \ 3436 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3437 B, B, \ 3438 kInput8bits##input, \ 3439 kInput8bitsImm##input_imm); \ 3440 } \ 3441 TEST(mnemonic##_H_2OPIMM) { \ 3442 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3443 H, H, \ 3444 kInput16bits##input, \ 3445 kInput16bitsImm##input_imm); \ 3446 } \ 3447 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) 3448 3449 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ 3450 TEST(mnemonic##_8H_2OPIMM) { \ 3451 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3452 8H, 8B, \ 3453 kInput8bits##input, \ 3454 kInput8bitsImm##input_imm); \ 3455 } \ 3456 TEST(mnemonic##_4S_2OPIMM) { \ 3457 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3458 4S, 4H, \ 3459 kInput16bits##input, \ 3460 kInput16bitsImm##input_imm); \ 3461 } \ 3462 TEST(mnemonic##_2D_2OPIMM) { \ 3463 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3464 2D, 2S, \ 3465 kInput32bits##input, \ 3466 kInput32bitsImm##input_imm); \ 3467 } \ 3468 TEST(mnemonic##2_8H_2OPIMM) { \ 3469 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3470 8H, 16B, \ 3471 kInput8bits##input, \ 3472 kInput8bitsImm##input_imm); \ 3473 } \ 3474 TEST(mnemonic##2_4S_2OPIMM) { \ 3475 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3476 4S, 8H, \ 3477 kInput16bits##input, \ 3478 kInput16bitsImm##input_imm); \ 3479 } \ 3480 TEST(mnemonic##2_2D_2OPIMM) { \ 3481 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3482 2D, 4S, \ 3483 kInput32bits##input, \ 3484 kInput32bitsImm##input_imm); \ 3485 } 3486 3487 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3488 vdform, vnform, vmform, \ 3489 input_d, input_n, \ 3490 input_m, indices) { \ 3491 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 3492 vdform, vnform, vmform, \ 3493 input_d, input_n, \ 3494 input_m, indices); \ 3495 } 3496 3497 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 3498 TEST(mnemonic##_4H_4H_H) { \ 3499 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3500 4H, 4H, H, \ 3501 kInput16bits##input_d, \ 3502 kInput16bits##input_n, \ 3503 kInput16bits##input_m, \ 3504 kInputHIndices); \ 3505 } \ 3506 TEST(mnemonic##_8H_8H_H) { \ 3507 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3508 8H, 8H, H, \ 3509 kInput16bits##input_d, \ 3510 kInput16bits##input_n, \ 3511 kInput16bits##input_m, \ 3512 kInputHIndices); \ 3513 } \ 3514 TEST(mnemonic##_2S_2S_S) { \ 3515 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3516 2S, 2S, S, \ 3517 kInput32bits##input_d, \ 3518 kInput32bits##input_n, \ 3519 kInput32bits##input_m, \ 3520 kInputSIndices); \ 3521 } \ 3522 TEST(mnemonic##_4S_4S_S) { \ 3523 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3524 4S, 4S, S, \ 3525 kInput32bits##input_d, \ 3526 kInput32bits##input_n, \ 3527 kInput32bits##input_m, \ 3528 kInputSIndices); \ 3529 } 3530 3531 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, \ 3532 input_d, input_n, input_m) \ 3533 TEST(mnemonic##_H_H_H) { \ 3534 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3535 H, H, H, \ 3536 kInput16bits##input_d, \ 3537 kInput16bits##input_n, \ 3538 kInput16bits##input_m, \ 3539 kInputHIndices); \ 3540 } \ 3541 TEST(mnemonic##_S_S_S) { \ 3542 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3543 S, S, S, \ 3544 kInput32bits##input_d, \ 3545 kInput32bits##input_n, \ 3546 kInput32bits##input_m, \ 3547 kInputSIndices); \ 3548 } 3549 3550 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 3551 TEST(mnemonic##_2S_2S_S) { \ 3552 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3553 2S, 2S, S, \ 3554 kInputFloat##input_d, \ 3555 kInputFloat##input_n, \ 3556 kInputFloat##input_m, \ 3557 kInputSIndices); \ 3558 } \ 3559 TEST(mnemonic##_4S_4S_S) { \ 3560 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3561 4S, 4S, S, \ 3562 kInputFloat##input_d, \ 3563 kInputFloat##input_n, \ 3564 kInputFloat##input_m, \ 3565 kInputSIndices); \ 3566 } \ 3567 TEST(mnemonic##_2D_2D_D) { \ 3568 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3569 2D, 2D, D, \ 3570 kInputDouble##input_d, \ 3571 kInputDouble##input_n, \ 3572 kInputDouble##input_m, \ 3573 kInputDIndices); \ 3574 } \ 3575 3576 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ 3577 TEST(mnemonic##_S_S_S) { \ 3578 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3579 S, S, S, \ 3580 kInputFloat##inp_d, \ 3581 kInputFloat##inp_n, \ 3582 kInputFloat##inp_m, \ 3583 kInputSIndices); \ 3584 } \ 3585 TEST(mnemonic##_D_D_D) { \ 3586 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3587 D, D, D, \ 3588 kInputDouble##inp_d, \ 3589 kInputDouble##inp_n, \ 3590 kInputDouble##inp_m, \ 3591 kInputDIndices); \ 3592 } \ 3593 3594 3595 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ 3596 TEST(mnemonic##_4S_4H_H) { \ 3597 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3598 4S, 4H, H, \ 3599 kInput32bits##input_d, \ 3600 kInput16bits##input_n, \ 3601 kInput16bits##input_m, \ 3602 kInputHIndices); \ 3603 } \ 3604 TEST(mnemonic##2_4S_8H_H) { \ 3605 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 3606 4S, 8H, H, \ 3607 kInput32bits##input_d, \ 3608 kInput16bits##input_n, \ 3609 kInput16bits##input_m, \ 3610 kInputHIndices); \ 3611 } \ 3612 TEST(mnemonic##_2D_2S_S) { \ 3613 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3614 2D, 2S, S, \ 3615 kInput64bits##input_d, \ 3616 kInput32bits##input_n, \ 3617 kInput32bits##input_m, \ 3618 kInputSIndices); \ 3619 } \ 3620 TEST(mnemonic##2_2D_4S_S) { \ 3621 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 3622 2D, 4S, S, \ 3623 kInput64bits##input_d, \ 3624 kInput32bits##input_n, \ 3625 kInput32bits##input_m, \ 3626 kInputSIndices); \ 3627 } 3628 3629 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, \ 3630 input_d, input_n, input_m) \ 3631 TEST(mnemonic##_S_H_H) { \ 3632 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3633 S, H, H, \ 3634 kInput32bits##input_d, \ 3635 kInput16bits##input_n, \ 3636 kInput16bits##input_m, \ 3637 kInputHIndices); \ 3638 } \ 3639 TEST(mnemonic##_D_S_S) { \ 3640 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3641 D, S, S, \ 3642 kInput64bits##input_d, \ 3643 kInput32bits##input_n, \ 3644 kInput32bits##input_m, \ 3645 kInputSIndices); \ 3646 } 3647 3648 3649 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3650 variant, \ 3651 input_d, \ 3652 input_imm1, \ 3653 input_n, \ 3654 input_imm2) { \ 3655 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \ 3656 mnemonic, \ 3657 variant, variant, \ 3658 input_d, input_imm1, \ 3659 input_n, input_imm2); \ 3660 } 3661 3662 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, \ 3663 input_d, input_imm1, \ 3664 input_n, input_imm2) \ 3665 TEST(mnemonic##_B) { \ 3666 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3667 16B, \ 3668 kInput8bits##input_d, \ 3669 kInput8bitsImm##input_imm1, \ 3670 kInput8bits##input_n, \ 3671 kInput8bitsImm##input_imm2); \ 3672 } \ 3673 TEST(mnemonic##_H) { \ 3674 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3675 8H, \ 3676 kInput16bits##input_d, \ 3677 kInput16bitsImm##input_imm1, \ 3678 kInput16bits##input_n, \ 3679 kInput16bitsImm##input_imm2); \ 3680 } \ 3681 TEST(mnemonic##_S) { \ 3682 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3683 4S, \ 3684 kInput32bits##input_d, \ 3685 kInput32bitsImm##input_imm1, \ 3686 kInput32bits##input_n, \ 3687 kInput32bitsImm##input_imm2); \ 3688 } \ 3689 TEST(mnemonic##_D) { \ 3690 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3691 2D, \ 3692 kInput64bits##input_d, \ 3693 kInput64bitsImm##input_imm1, \ 3694 kInput64bits##input_n, \ 3695 kInput64bitsImm##input_imm2); \ 3696 } 3697 3698 3699 // Advanced SIMD copy. 3700 DEFINE_TEST_NEON_2OP2IMM(ins, 3701 Basic, LaneCountFromZero, 3702 Basic, LaneCountFromZero) 3703 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) 3704 3705 3706 // Advanced SIMD scalar copy. 3707 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) 3708 3709 3710 // Advanced SIMD three same. 3711 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) 3712 DEFINE_TEST_NEON_3SAME(sqadd, Basic) 3713 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) 3714 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) 3715 DEFINE_TEST_NEON_3SAME(sqsub, Basic) 3716 DEFINE_TEST_NEON_3SAME(cmgt, Basic) 3717 DEFINE_TEST_NEON_3SAME(cmge, Basic) 3718 DEFINE_TEST_NEON_3SAME(sshl, Basic) 3719 DEFINE_TEST_NEON_3SAME(sqshl, Basic) 3720 DEFINE_TEST_NEON_3SAME(srshl, Basic) 3721 DEFINE_TEST_NEON_3SAME(sqrshl, Basic) 3722 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) 3723 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) 3724 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) 3725 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) 3726 DEFINE_TEST_NEON_3SAME(add, Basic) 3727 DEFINE_TEST_NEON_3SAME(cmtst, Basic) 3728 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) 3729 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) 3730 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) 3731 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) 3732 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) 3733 DEFINE_TEST_NEON_3SAME(addp, Basic) 3734 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) 3735 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) 3736 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) 3737 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) 3738 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) 3739 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) 3740 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) 3741 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) 3742 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) 3743 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) 3744 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) 3745 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) 3746 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) 3747 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) 3748 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) 3749 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) 3750 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) 3751 DEFINE_TEST_NEON_3SAME(uqadd, Basic) 3752 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) 3753 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) 3754 DEFINE_TEST_NEON_3SAME(uqsub, Basic) 3755 DEFINE_TEST_NEON_3SAME(cmhi, Basic) 3756 DEFINE_TEST_NEON_3SAME(cmhs, Basic) 3757 DEFINE_TEST_NEON_3SAME(ushl, Basic) 3758 DEFINE_TEST_NEON_3SAME(uqshl, Basic) 3759 DEFINE_TEST_NEON_3SAME(urshl, Basic) 3760 DEFINE_TEST_NEON_3SAME(uqrshl, Basic) 3761 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) 3762 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) 3763 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) 3764 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) 3765 DEFINE_TEST_NEON_3SAME(sub, Basic) 3766 DEFINE_TEST_NEON_3SAME(cmeq, Basic) 3767 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) 3768 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) 3769 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) 3770 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) 3771 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) 3772 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) 3773 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) 3774 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) 3775 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) 3776 DEFINE_TEST_NEON_3SAME_FP(facge, Basic) 3777 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) 3778 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) 3779 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) 3780 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) 3781 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) 3782 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) 3783 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) 3784 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) 3785 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) 3786 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) 3787 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) 3788 3789 3790 // Advanced SIMD scalar three same. 3791 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) 3792 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) 3793 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) 3794 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) 3795 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) 3796 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) 3797 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) 3798 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) 3799 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) 3800 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) 3801 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) 3802 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) 3803 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) 3804 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) 3805 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) 3806 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) 3807 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) 3808 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) 3809 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) 3810 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) 3811 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) 3812 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) 3813 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) 3814 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) 3815 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) 3816 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) 3817 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) 3818 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) 3819 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) 3820 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) 3821 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) 3822 3823 3824 // Advanced SIMD three different. 3825 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) 3826 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) 3827 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) 3828 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) 3829 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) 3830 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) 3831 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) 3832 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) 3833 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) 3834 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) 3835 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) 3836 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) 3837 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) 3838 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) 3839 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) 3840 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) 3841 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) 3842 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) 3843 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) 3844 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) 3845 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) 3846 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) 3847 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) 3848 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) 3849 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) 3850 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) 3851 3852 3853 // Advanced SIMD scalar three different. 3854 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) 3855 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) 3856 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) 3857 3858 3859 // Advanced SIMD scalar pairwise. 3860 TEST(addp_SCALAR) { 3861 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); 3862 } 3863 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) 3864 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) 3865 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) 3866 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) 3867 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) 3868 3869 3870 // Advanced SIMD shift by immediate. 3871 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) 3872 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) 3873 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) 3874 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) 3875 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) 3876 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) 3877 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) 3878 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) 3879 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) 3880 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) 3881 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) 3882 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \ 3883 TypeWidthFromZeroToWidth) 3884 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 3885 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) 3886 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) 3887 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) 3888 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) 3889 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) 3890 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) 3891 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) 3892 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) 3893 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) 3894 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) 3895 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) 3896 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) 3897 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) 3898 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \ 3899 TypeWidthFromZeroToWidth) 3900 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 3901 3902 3903 // Advanced SIMD scalar shift by immediate.. 3904 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) 3905 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) 3906 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) 3907 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) 3908 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) 3909 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) 3910 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) 3911 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) 3912 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \ 3913 TypeWidthFromZeroToWidth) 3914 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 3915 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) 3916 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) 3917 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) 3918 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) 3919 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) 3920 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) 3921 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) 3922 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) 3923 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) 3924 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) 3925 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) 3926 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) 3927 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \ 3928 TypeWidthFromZeroToWidth) 3929 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 3930 3931 3932 // Advanced SIMD two-register miscellaneous. 3933 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) 3934 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) 3935 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) 3936 DEFINE_TEST_NEON_2SAME(suqadd, Basic) 3937 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) 3938 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) 3939 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) 3940 DEFINE_TEST_NEON_2SAME(sqabs, Basic) 3941 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) 3942 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) 3943 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) 3944 DEFINE_TEST_NEON_2SAME(abs, Basic) 3945 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) 3946 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) 3947 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) 3948 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) 3949 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions) 3950 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions) 3951 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions) 3952 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions) 3953 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions) 3954 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 3955 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) 3956 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) 3957 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) 3958 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic) 3959 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions) 3960 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions) 3961 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions) 3962 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 3963 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) 3964 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic) 3965 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) 3966 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) 3967 DEFINE_TEST_NEON_2SAME(usqadd, Basic) 3968 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) 3969 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) 3970 DEFINE_TEST_NEON_2SAME(sqneg, Basic) 3971 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) 3972 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) 3973 DEFINE_TEST_NEON_2SAME(neg, Basic) 3974 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) 3975 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) 3976 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) 3977 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) 3978 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions) 3979 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions) 3980 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions) 3981 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions) 3982 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions) 3983 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 3984 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) 3985 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) 3986 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) 3987 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) 3988 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic) 3989 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions) 3990 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions) 3991 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 3992 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) 3993 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic) 3994 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic) 3995 3996 3997 // Advanced SIMD scalar two-register miscellaneous. 3998 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) 3999 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) 4000 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) 4001 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) 4002 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) 4003 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) 4004 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) 4005 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions) 4006 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions) 4007 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions) 4008 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero) 4010 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero) 4011 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero) 4012 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions) 4013 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4014 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic) 4015 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic) 4016 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) 4017 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) 4018 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) 4019 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) 4020 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) 4021 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) 4022 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) 4023 TEST(fcvtxn_SCALAR) { 4024 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); 4025 } 4026 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions) 4027 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions) 4028 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions) 4029 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4030 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero) 4031 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero) 4032 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions) 4033 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4034 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic) 4035 4036 4037 // Advanced SIMD across lanes. 4038 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) 4039 DEFINE_TEST_NEON_ACROSS(smaxv, Basic) 4040 DEFINE_TEST_NEON_ACROSS(sminv, Basic) 4041 DEFINE_TEST_NEON_ACROSS(addv, Basic) 4042 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) 4043 DEFINE_TEST_NEON_ACROSS(umaxv, Basic) 4044 DEFINE_TEST_NEON_ACROSS(uminv, Basic) 4045 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) 4046 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) 4047 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) 4048 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) 4049 4050 4051 // Advanced SIMD permute. 4052 DEFINE_TEST_NEON_3SAME(uzp1, Basic) 4053 DEFINE_TEST_NEON_3SAME(trn1, Basic) 4054 DEFINE_TEST_NEON_3SAME(zip1, Basic) 4055 DEFINE_TEST_NEON_3SAME(uzp2, Basic) 4056 DEFINE_TEST_NEON_3SAME(trn2, Basic) 4057 DEFINE_TEST_NEON_3SAME(zip2, Basic) 4058 4059 4060 // Advanced SIMD vector x indexed element. 4061 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) 4062 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) 4063 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) 4064 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) 4065 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) 4066 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) 4067 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) 4068 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) 4069 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) 4070 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) 4071 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) 4072 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) 4073 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) 4074 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) 4075 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) 4076 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) 4077 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) 4078 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) 4079 4080 4081 // Advanced SIMD scalar x indexed element. 4082 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) 4083 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) 4084 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) 4085 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) 4086 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) 4087 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) 4088 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) 4089 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) 4090 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) 4091 4092 } // namespace vixl 4093