Home | History | Annotate | Download | only in test
      1 // Copyright 2015, ARM Limited
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include <stdio.h>
     28 #include <float.h>
     29 
     30 #include "test-runner.h"
     31 #include "test-utils-a64.h"
     32 #include "test-simulator-inputs-a64.h"
     33 #include "test-simulator-traces-a64.h"
     34 #include "vixl/a64/macro-assembler-a64.h"
     35 #include "vixl/a64/simulator-a64.h"
     36 
     37 namespace vixl {
     38 
     39 // ==== Simulator Tests ====
     40 //
     41 // These simulator tests check instruction behaviour against a trace taken from
     42 // real AArch64 hardware. The same test code is used to generate the trace; the
     43 // results are printed to stdout when the test is run with --sim_test_trace.
     44 //
     45 // The input lists and expected results are stored in test/traces. The expected
     46 // results can be regenerated using tools/generate_simulator_traces.py. Adding
     47 // a test for a new instruction is described at the top of
     48 // test-simulator-traces-a64.h.
     49 
     50 #define __ masm.
     51 #define TEST(name)  TEST_(SIM_##name)
     52 
     53 #define BUF_SIZE (256)
     54 
     55 #ifdef VIXL_INCLUDE_SIMULATOR
     56 
     57 #define SETUP()                                                               \
     58   MacroAssembler masm(BUF_SIZE);                                              \
     59   Decoder decoder;                                                            \
     60   Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)        \
     61                                               : new Simulator(&decoder);      \
     62   simulator->set_coloured_trace(Test::coloured_trace());                      \
     63   simulator->set_instruction_stats(Test::instruction_stats());                \
     64 
     65 #define START()                                                               \
     66   masm.Reset();                                                               \
     67   simulator->ResetState();                                                    \
     68   __ PushCalleeSavedRegisters();                                              \
     69   if (Test::trace_reg()) {                                                    \
     70     __ Trace(LOG_STATE, TRACE_ENABLE);                                        \
     71   }                                                                           \
     72   if (Test::trace_write()) {                                                  \
     73     __ Trace(LOG_WRITE, TRACE_ENABLE);                                        \
     74   }                                                                           \
     75   if (Test::trace_sim()) {                                                    \
     76     __ Trace(LOG_DISASM, TRACE_ENABLE);                                       \
     77   }                                                                           \
     78   if (Test::instruction_stats()) {                                            \
     79     __ EnableInstrumentation();                                               \
     80   }
     81 
     82 #define END()                                                                 \
     83   if (Test::instruction_stats()) {                                            \
     84     __ DisableInstrumentation();                                              \
     85   }                                                                           \
     86   __ Trace(LOG_ALL, TRACE_DISABLE);                                           \
     87   __ PopCalleeSavedRegisters();                                               \
     88   __ Ret();                                                                   \
     89   masm.FinalizeCode()
     90 
     91 #define RUN()                                                                 \
     92   simulator->RunFrom(masm.GetStartAddress<Instruction*>())
     93 
     94 #define TEARDOWN()                                                            \
     95   delete simulator;
     96 
     97 #else     // VIXL_INCLUDE_SIMULATOR
     98 
     99 #define SETUP()                                                               \
    100   MacroAssembler masm(BUF_SIZE);                                              \
    101   CPU::SetUp()
    102 
    103 #define START()                                                               \
    104   masm.Reset();                                                               \
    105   __ PushCalleeSavedRegisters()
    106 
    107 #define END()                                                                 \
    108   __ PopCalleeSavedRegisters();                                               \
    109   __ Ret();                                                                   \
    110   masm.FinalizeCode()
    111 
    112 #define RUN()                                                                  \
    113   {                                                                            \
    114     byte* buffer_start = masm.GetStartAddress<byte*>();                        \
    115     size_t buffer_length = masm.CursorOffset();                                \
    116     void (*test_function)(void);                                               \
    117                                                                                \
    118     CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length);               \
    119     VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function));         \
    120     memcpy(&test_function, &buffer_start, sizeof(buffer_start));               \
    121     test_function();                                                           \
    122   }
    123 
    124 #define TEARDOWN()
    125 
    126 #endif    // VIXL_INCLUDE_SIMULATOR
    127 
    128 
    129 // The maximum number of errors to report in detail for each test.
    130 static const unsigned kErrorReportLimit = 8;
    131 
    132 
    133 // Overloaded versions of rawbits_to_double and rawbits_to_float for use in the
    134 // templated test functions.
    135 static float rawbits_to_fp(uint32_t bits) {
    136   return rawbits_to_float(bits);
    137 }
    138 
    139 static double rawbits_to_fp(uint64_t bits) {
    140   return rawbits_to_double(bits);
    141 }
    142 
    143 
    144 // MacroAssembler member function pointers to pass to the test dispatchers.
    145 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
    146                                                   const FPRegister& fn);
    147 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
    148                                                   const FPRegister& fn,
    149                                                   const FPRegister& fm);
    150 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
    151                                                   const FPRegister& fn,
    152                                                   const FPRegister& fm,
    153                                                   const FPRegister& fa);
    154 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
    155                                                   const FPRegister& fm);
    156 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
    157                                                       double value);
    158 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
    159                                                     const FPRegister& fn);
    160 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
    161                                                       const FPRegister& fn,
    162                                                       int fbits);
    163 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
    164                                                       const Register& rn,
    165                                                       int fbits);
    166 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
    167 //       consolidated into one routine.
    168 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(
    169   const VRegister& vd, const VRegister& vn);
    170 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(
    171   const VRegister& vd, const VRegister& vn, const VRegister& vm);
    172 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(
    173   const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index);
    174 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
    175   const VRegister& vd, int imm1, const VRegister& vn, int imm2);
    176 
    177 // This helps using the same typename for both the function pointer
    178 // and the array of immediates passed to helper routines.
    179 template <typename T>
    180 class Test2OpImmediateNEONHelper_t {
    181  public:
    182     typedef void (MacroAssembler::*mnemonic)(
    183       const VRegister& vd, const VRegister& vn, T imm);
    184 };
    185 
    186 
    187 // Maximum number of hex characters required to represent values of either
    188 // templated type.
    189 template <typename Ta, typename Tb>
    190 static unsigned MaxHexCharCount() {
    191   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
    192   return (count * 8) / 4;
    193 }
    194 
    195 
    196 // Standard test dispatchers.
    197 
    198 
    199 static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs,
    200                            unsigned inputs_length, uintptr_t results,
    201                            unsigned d_size, unsigned n_size) {
    202   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
    203   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
    204 
    205   SETUP();
    206   START();
    207 
    208   // Roll up the loop to keep the code size down.
    209   Label loop_n;
    210 
    211   Register out = x0;
    212   Register inputs_base = x1;
    213   Register length = w2;
    214   Register index_n = w3;
    215 
    216   const int n_index_shift =
    217       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    218 
    219   FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
    220   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
    221 
    222   __ Mov(out, results);
    223   __ Mov(inputs_base, inputs);
    224   __ Mov(length, inputs_length);
    225 
    226   __ Mov(index_n, 0);
    227   __ Bind(&loop_n);
    228   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
    229 
    230   {
    231     SingleEmissionCheckScope guard(&masm);
    232     (masm.*helper)(fd, fn);
    233   }
    234   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
    235 
    236   __ Add(index_n, index_n, 1);
    237   __ Cmp(index_n, inputs_length);
    238   __ B(lo, &loop_n);
    239 
    240   END();
    241   RUN();
    242   TEARDOWN();
    243 }
    244 
    245 
    246 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    247 // rawbits representations of doubles or floats. This ensures that exact bit
    248 // comparisons can be performed.
    249 template <typename Tn, typename Td>
    250 static void Test1Op(const char * name, Test1OpFPHelper_t helper,
    251                     const Tn inputs[], unsigned inputs_length,
    252                     const Td expected[], unsigned expected_length) {
    253   VIXL_ASSERT(inputs_length > 0);
    254 
    255   const unsigned results_length = inputs_length;
    256   Td * results = new Td[results_length];
    257 
    258   const unsigned d_bits = sizeof(Td) * 8;
    259   const unsigned n_bits = sizeof(Tn) * 8;
    260 
    261   Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
    262                  reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
    263 
    264   if (Test::sim_test_trace()) {
    265     // Print the results.
    266     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
    267     for (unsigned d = 0; d < results_length; d++) {
    268       printf("  0x%0*" PRIx64 ",\n",
    269              d_bits / 4, static_cast<uint64_t>(results[d]));
    270     }
    271     printf("};\n");
    272     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    273   } else {
    274     // Check the results.
    275     VIXL_CHECK(expected_length == results_length);
    276     unsigned error_count = 0;
    277     unsigned d = 0;
    278     for (unsigned n = 0; n < inputs_length; n++, d++) {
    279       if (results[d] != expected[d]) {
    280         if (++error_count > kErrorReportLimit) continue;
    281 
    282         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
    283                name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
    284                name, rawbits_to_fp(inputs[n]));
    285         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    286                d_bits / 4, static_cast<uint64_t>(expected[d]),
    287                rawbits_to_fp(expected[d]));
    288         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    289                d_bits / 4, static_cast<uint64_t>(results[d]),
    290                rawbits_to_fp(results[d]));
    291         printf("\n");
    292       }
    293     }
    294     VIXL_ASSERT(d == expected_length);
    295     if (error_count > kErrorReportLimit) {
    296       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    297     }
    298     VIXL_CHECK(error_count == 0);
    299   }
    300   delete[] results;
    301 }
    302 
    303 
    304 static void Test2Op_Helper(Test2OpFPHelper_t helper,
    305                            uintptr_t inputs, unsigned inputs_length,
    306                            uintptr_t results, unsigned reg_size) {
    307   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    308 
    309   SETUP();
    310   START();
    311 
    312   // Roll up the loop to keep the code size down.
    313   Label loop_n, loop_m;
    314 
    315   Register out = x0;
    316   Register inputs_base = x1;
    317   Register length = w2;
    318   Register index_n = w3;
    319   Register index_m = w4;
    320 
    321   bool double_op = reg_size == kDRegSize;
    322   const int index_shift =
    323       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    324 
    325   FPRegister fd = double_op ? d0 : s0;
    326   FPRegister fn = double_op ? d1 : s1;
    327   FPRegister fm = double_op ? d2 : s2;
    328 
    329   __ Mov(out, results);
    330   __ Mov(inputs_base, inputs);
    331   __ Mov(length, inputs_length);
    332 
    333   __ Mov(index_n, 0);
    334   __ Bind(&loop_n);
    335   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    336 
    337   __ Mov(index_m, 0);
    338   __ Bind(&loop_m);
    339   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    340 
    341   {
    342     SingleEmissionCheckScope guard(&masm);
    343     (masm.*helper)(fd, fn, fm);
    344   }
    345     __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
    346 
    347   __ Add(index_m, index_m, 1);
    348   __ Cmp(index_m, inputs_length);
    349   __ B(lo, &loop_m);
    350 
    351   __ Add(index_n, index_n, 1);
    352   __ Cmp(index_n, inputs_length);
    353   __ B(lo, &loop_n);
    354 
    355   END();
    356   RUN();
    357   TEARDOWN();
    358 }
    359 
    360 
    361 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    362 // rawbits representations of doubles or floats. This ensures that exact bit
    363 // comparisons can be performed.
    364 template <typename T>
    365 static void Test2Op(const char * name, Test2OpFPHelper_t helper,
    366                     const T inputs[], unsigned inputs_length,
    367                     const T expected[], unsigned expected_length) {
    368   VIXL_ASSERT(inputs_length > 0);
    369 
    370   const unsigned results_length = inputs_length * inputs_length;
    371   T * results = new T[results_length];
    372 
    373   const unsigned bits = sizeof(T) * 8;
    374 
    375   Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
    376                  reinterpret_cast<uintptr_t>(results), bits);
    377 
    378   if (Test::sim_test_trace()) {
    379     // Print the results.
    380     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
    381     for (unsigned d = 0; d < results_length; d++) {
    382       printf("  0x%0*" PRIx64 ",\n",
    383              bits / 4, static_cast<uint64_t>(results[d]));
    384     }
    385     printf("};\n");
    386     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    387   } else {
    388     // Check the results.
    389     VIXL_CHECK(expected_length == results_length);
    390     unsigned error_count = 0;
    391     unsigned d = 0;
    392     for (unsigned n = 0; n < inputs_length; n++) {
    393       for (unsigned m = 0; m < inputs_length; m++, d++) {
    394         if (results[d] != expected[d]) {
    395           if (++error_count > kErrorReportLimit) continue;
    396 
    397           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
    398                  name,
    399                  bits / 4, static_cast<uint64_t>(inputs[n]),
    400                  bits / 4, static_cast<uint64_t>(inputs[m]),
    401                  name,
    402                  rawbits_to_fp(inputs[n]),
    403                  rawbits_to_fp(inputs[m]));
    404           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    405                  bits / 4, static_cast<uint64_t>(expected[d]),
    406                  rawbits_to_fp(expected[d]));
    407           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    408                  bits / 4, static_cast<uint64_t>(results[d]),
    409                  rawbits_to_fp(results[d]));
    410           printf("\n");
    411         }
    412       }
    413     }
    414     VIXL_ASSERT(d == expected_length);
    415     if (error_count > kErrorReportLimit) {
    416       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    417     }
    418     VIXL_CHECK(error_count == 0);
    419   }
    420   delete[] results;
    421 }
    422 
    423 
    424 static void Test3Op_Helper(Test3OpFPHelper_t helper,
    425                            uintptr_t inputs, unsigned inputs_length,
    426                            uintptr_t results, unsigned reg_size) {
    427   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    428 
    429   SETUP();
    430   START();
    431 
    432   // Roll up the loop to keep the code size down.
    433   Label loop_n, loop_m, loop_a;
    434 
    435   Register out = x0;
    436   Register inputs_base = x1;
    437   Register length = w2;
    438   Register index_n = w3;
    439   Register index_m = w4;
    440   Register index_a = w5;
    441 
    442   bool double_op = reg_size == kDRegSize;
    443   const int index_shift =
    444       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    445 
    446   FPRegister fd = double_op ? d0 : s0;
    447   FPRegister fn = double_op ? d1 : s1;
    448   FPRegister fm = double_op ? d2 : s2;
    449   FPRegister fa = double_op ? d3 : s3;
    450 
    451   __ Mov(out, results);
    452   __ Mov(inputs_base, inputs);
    453   __ Mov(length, inputs_length);
    454 
    455   __ Mov(index_n, 0);
    456   __ Bind(&loop_n);
    457   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    458 
    459   __ Mov(index_m, 0);
    460   __ Bind(&loop_m);
    461   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    462 
    463   __ Mov(index_a, 0);
    464   __ Bind(&loop_a);
    465   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
    466 
    467   {
    468     SingleEmissionCheckScope guard(&masm);
    469     (masm.*helper)(fd, fn, fm, fa);
    470   }
    471   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
    472 
    473   __ Add(index_a, index_a, 1);
    474   __ Cmp(index_a, inputs_length);
    475   __ B(lo, &loop_a);
    476 
    477   __ Add(index_m, index_m, 1);
    478   __ Cmp(index_m, inputs_length);
    479   __ B(lo, &loop_m);
    480 
    481   __ Add(index_n, index_n, 1);
    482   __ Cmp(index_n, inputs_length);
    483   __ B(lo, &loop_n);
    484 
    485   END();
    486   RUN();
    487   TEARDOWN();
    488 }
    489 
    490 
    491 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    492 // rawbits representations of doubles or floats. This ensures that exact bit
    493 // comparisons can be performed.
    494 template <typename T>
    495 static void Test3Op(const char * name, Test3OpFPHelper_t helper,
    496                     const T inputs[], unsigned inputs_length,
    497                     const T expected[], unsigned expected_length) {
    498   VIXL_ASSERT(inputs_length > 0);
    499 
    500   const unsigned results_length = inputs_length * inputs_length * inputs_length;
    501   T * results = new T[results_length];
    502 
    503   const unsigned bits = sizeof(T) * 8;
    504 
    505   Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
    506                  reinterpret_cast<uintptr_t>(results), bits);
    507 
    508   if (Test::sim_test_trace()) {
    509     // Print the results.
    510     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
    511     for (unsigned d = 0; d < results_length; d++) {
    512       printf("  0x%0*" PRIx64 ",\n",
    513              bits / 4, static_cast<uint64_t>(results[d]));
    514     }
    515     printf("};\n");
    516     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    517   } else {
    518     // Check the results.
    519     VIXL_CHECK(expected_length == results_length);
    520     unsigned error_count = 0;
    521     unsigned d = 0;
    522     for (unsigned n = 0; n < inputs_length; n++) {
    523       for (unsigned m = 0; m < inputs_length; m++) {
    524         for (unsigned a = 0; a < inputs_length; a++, d++) {
    525           if (results[d] != expected[d]) {
    526             if (++error_count > kErrorReportLimit) continue;
    527 
    528             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
    529                    " (%s %g %g %g):\n",
    530                    name,
    531                    bits / 4, static_cast<uint64_t>(inputs[n]),
    532                    bits / 4, static_cast<uint64_t>(inputs[m]),
    533                    bits / 4, static_cast<uint64_t>(inputs[a]),
    534                    name,
    535                    rawbits_to_fp(inputs[n]),
    536                    rawbits_to_fp(inputs[m]),
    537                    rawbits_to_fp(inputs[a]));
    538             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    539                    bits / 4, static_cast<uint64_t>(expected[d]),
    540                    rawbits_to_fp(expected[d]));
    541             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    542                    bits / 4, static_cast<uint64_t>(results[d]),
    543                    rawbits_to_fp(results[d]));
    544             printf("\n");
    545           }
    546         }
    547       }
    548     }
    549     VIXL_ASSERT(d == expected_length);
    550     if (error_count > kErrorReportLimit) {
    551       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    552     }
    553     VIXL_CHECK(error_count == 0);
    554   }
    555   delete[] results;
    556 }
    557 
    558 
    559 static void TestCmp_Helper(TestFPCmpHelper_t helper,
    560                            uintptr_t inputs, unsigned inputs_length,
    561                            uintptr_t results, unsigned reg_size) {
    562   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    563 
    564   SETUP();
    565   START();
    566 
    567   // Roll up the loop to keep the code size down.
    568   Label loop_n, loop_m;
    569 
    570   Register out = x0;
    571   Register inputs_base = x1;
    572   Register length = w2;
    573   Register index_n = w3;
    574   Register index_m = w4;
    575   Register flags = x5;
    576 
    577   bool double_op = reg_size == kDRegSize;
    578   const int index_shift =
    579       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    580 
    581   FPRegister fn = double_op ? d1 : s1;
    582   FPRegister fm = double_op ? d2 : s2;
    583 
    584   __ Mov(out, results);
    585   __ Mov(inputs_base, inputs);
    586   __ Mov(length, inputs_length);
    587 
    588   __ Mov(index_n, 0);
    589   __ Bind(&loop_n);
    590   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    591 
    592   __ Mov(index_m, 0);
    593   __ Bind(&loop_m);
    594   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    595 
    596   {
    597     SingleEmissionCheckScope guard(&masm);
    598     (masm.*helper)(fn, fm);
    599   }
    600   __ Mrs(flags, NZCV);
    601   __ Ubfx(flags, flags, 28, 4);
    602   __ Strb(flags, MemOperand(out, 1, PostIndex));
    603 
    604   __ Add(index_m, index_m, 1);
    605   __ Cmp(index_m, inputs_length);
    606   __ B(lo, &loop_m);
    607 
    608   __ Add(index_n, index_n, 1);
    609   __ Cmp(index_n, inputs_length);
    610   __ B(lo, &loop_n);
    611 
    612   END();
    613   RUN();
    614   TEARDOWN();
    615 }
    616 
    617 
    618 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    619 // rawbits representations of doubles or floats. This ensures that exact bit
    620 // comparisons can be performed.
    621 template <typename T>
    622 static void TestCmp(const char * name, TestFPCmpHelper_t helper,
    623                     const T inputs[], unsigned inputs_length,
    624                     const uint8_t expected[], unsigned expected_length) {
    625   VIXL_ASSERT(inputs_length > 0);
    626 
    627   const unsigned results_length = inputs_length * inputs_length;
    628   uint8_t * results = new uint8_t[results_length];
    629 
    630   const unsigned bits = sizeof(T) * 8;
    631 
    632   TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
    633                  reinterpret_cast<uintptr_t>(results), bits);
    634 
    635   if (Test::sim_test_trace()) {
    636     // Print the results.
    637     printf("const uint8_t kExpected_%s[] = {\n", name);
    638     for (unsigned d = 0; d < results_length; d++) {
    639       // Each NZCV result only requires 4 bits.
    640       VIXL_ASSERT((results[d] & 0xf) == results[d]);
    641       printf("  0x%" PRIx8 ",\n", results[d]);
    642     }
    643     printf("};\n");
    644     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    645   } else {
    646     // Check the results.
    647     VIXL_CHECK(expected_length == results_length);
    648     unsigned error_count = 0;
    649     unsigned d = 0;
    650     for (unsigned n = 0; n < inputs_length; n++) {
    651       for (unsigned m = 0; m < inputs_length; m++, d++) {
    652         if (results[d] != expected[d]) {
    653           if (++error_count > kErrorReportLimit) continue;
    654 
    655           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
    656                  name,
    657                  bits / 4, static_cast<uint64_t>(inputs[n]),
    658                  bits / 4, static_cast<uint64_t>(inputs[m]),
    659                  name,
    660                  rawbits_to_fp(inputs[n]),
    661                  rawbits_to_fp(inputs[m]));
    662           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
    663                  (expected[d] & 0x8) ? 'N' : 'n',
    664                  (expected[d] & 0x4) ? 'Z' : 'z',
    665                  (expected[d] & 0x2) ? 'C' : 'c',
    666                  (expected[d] & 0x1) ? 'V' : 'v',
    667                  expected[d]);
    668           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
    669                  (results[d] & 0x8) ? 'N' : 'n',
    670                  (results[d] & 0x4) ? 'Z' : 'z',
    671                  (results[d] & 0x2) ? 'C' : 'c',
    672                  (results[d] & 0x1) ? 'V' : 'v',
    673                  results[d]);
    674           printf("\n");
    675         }
    676       }
    677     }
    678     VIXL_ASSERT(d == expected_length);
    679     if (error_count > kErrorReportLimit) {
    680       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    681     }
    682     VIXL_CHECK(error_count == 0);
    683   }
    684   delete[] results;
    685 }
    686 
    687 
    688 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
    689                                uintptr_t inputs, unsigned inputs_length,
    690                                uintptr_t results, unsigned reg_size) {
    691   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    692 
    693   SETUP();
    694   START();
    695 
    696   // Roll up the loop to keep the code size down.
    697   Label loop_n, loop_m;
    698 
    699   Register out = x0;
    700   Register inputs_base = x1;
    701   Register length = w2;
    702   Register index_n = w3;
    703   Register flags = x4;
    704 
    705   bool double_op = reg_size == kDRegSize;
    706   const int index_shift =
    707       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    708 
    709   FPRegister fn = double_op ? d1 : s1;
    710 
    711   __ Mov(out, results);
    712   __ Mov(inputs_base, inputs);
    713   __ Mov(length, inputs_length);
    714 
    715   __ Mov(index_n, 0);
    716   __ Bind(&loop_n);
    717   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    718 
    719   {
    720     SingleEmissionCheckScope guard(&masm);
    721     (masm.*helper)(fn, 0.0);
    722   }
    723   __ Mrs(flags, NZCV);
    724   __ Ubfx(flags, flags, 28, 4);
    725   __ Strb(flags, MemOperand(out, 1, PostIndex));
    726 
    727   __ Add(index_n, index_n, 1);
    728   __ Cmp(index_n, inputs_length);
    729   __ B(lo, &loop_n);
    730 
    731   END();
    732   RUN();
    733   TEARDOWN();
    734 }
    735 
    736 
    737 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    738 // rawbits representations of doubles or floats. This ensures that exact bit
    739 // comparisons can be performed.
    740 template <typename T>
    741 static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
    742                         const T inputs[], unsigned inputs_length,
    743                         const uint8_t expected[], unsigned expected_length) {
    744   VIXL_ASSERT(inputs_length > 0);
    745 
    746   const unsigned results_length = inputs_length;
    747   uint8_t * results = new uint8_t[results_length];
    748 
    749   const unsigned bits = sizeof(T) * 8;
    750 
    751   TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
    752                      reinterpret_cast<uintptr_t>(results), bits);
    753 
    754   if (Test::sim_test_trace()) {
    755     // Print the results.
    756     printf("const uint8_t kExpected_%s[] = {\n", name);
    757     for (unsigned d = 0; d < results_length; d++) {
    758       // Each NZCV result only requires 4 bits.
    759       VIXL_ASSERT((results[d] & 0xf) == results[d]);
    760       printf("  0x%" PRIx8 ",\n", results[d]);
    761     }
    762     printf("};\n");
    763     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    764   } else {
    765     // Check the results.
    766     VIXL_CHECK(expected_length == results_length);
    767     unsigned error_count = 0;
    768     unsigned d = 0;
    769     for (unsigned n = 0; n < inputs_length; n++, d++) {
    770       if (results[d] != expected[d]) {
    771         if (++error_count > kErrorReportLimit) continue;
    772 
    773         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
    774                name,
    775                bits / 4, static_cast<uint64_t>(inputs[n]),
    776                bits / 4, 0,
    777                name,
    778                rawbits_to_fp(inputs[n]));
    779         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
    780                (expected[d] & 0x8) ? 'N' : 'n',
    781                (expected[d] & 0x4) ? 'Z' : 'z',
    782                (expected[d] & 0x2) ? 'C' : 'c',
    783                (expected[d] & 0x1) ? 'V' : 'v',
    784                expected[d]);
    785         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
    786                (results[d] & 0x8) ? 'N' : 'n',
    787                (results[d] & 0x4) ? 'Z' : 'z',
    788                (results[d] & 0x2) ? 'C' : 'c',
    789                (results[d] & 0x1) ? 'V' : 'v',
    790                results[d]);
    791         printf("\n");
    792       }
    793     }
    794     VIXL_ASSERT(d == expected_length);
    795     if (error_count > kErrorReportLimit) {
    796       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    797     }
    798     VIXL_CHECK(error_count == 0);
    799   }
    800   delete[] results;
    801 }
    802 
    803 
    804 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
    805                                  uintptr_t inputs, unsigned inputs_length,
    806                                  uintptr_t results,
    807                                  unsigned d_size, unsigned n_size) {
    808   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
    809   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
    810 
    811   SETUP();
    812   START();
    813 
    814   // Roll up the loop to keep the code size down.
    815   Label loop_n;
    816 
    817   Register out = x0;
    818   Register inputs_base = x1;
    819   Register length = w2;
    820   Register index_n = w3;
    821 
    822   const int n_index_shift =
    823       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    824 
    825   Register rd = (d_size == kXRegSize) ? x10 : w10;
    826   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
    827 
    828   __ Mov(out, results);
    829   __ Mov(inputs_base, inputs);
    830   __ Mov(length, inputs_length);
    831 
    832   __ Mov(index_n, 0);
    833   __ Bind(&loop_n);
    834   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
    835 
    836   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
    837     {
    838       SingleEmissionCheckScope guard(&masm);
    839       (masm.*helper)(rd, fn, fbits);
    840     }
    841     __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
    842   }
    843 
    844   __ Add(index_n, index_n, 1);
    845   __ Cmp(index_n, inputs_length);
    846   __ B(lo, &loop_n);
    847 
    848   END();
    849   RUN();
    850   TEARDOWN();
    851 }
    852 
    853 
    854 static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs,
    855                                unsigned inputs_length, uintptr_t results,
    856                                unsigned d_size, unsigned n_size) {
    857   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
    858   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
    859 
    860   SETUP();
    861   START();
    862 
    863   // Roll up the loop to keep the code size down.
    864   Label loop_n;
    865 
    866   Register out = x0;
    867   Register inputs_base = x1;
    868   Register length = w2;
    869   Register index_n = w3;
    870 
    871   const int n_index_shift =
    872       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    873 
    874   Register rd = (d_size == kXRegSize) ? x10 : w10;
    875   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
    876 
    877   __ Mov(out, results);
    878   __ Mov(inputs_base, inputs);
    879   __ Mov(length, inputs_length);
    880 
    881   __ Mov(index_n, 0);
    882   __ Bind(&loop_n);
    883   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
    884 
    885   {
    886     SingleEmissionCheckScope guard(&masm);
    887     (masm.*helper)(rd, fn);
    888   }
    889   __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
    890 
    891   __ Add(index_n, index_n, 1);
    892   __ Cmp(index_n, inputs_length);
    893   __ B(lo, &loop_n);
    894 
    895   END();
    896   RUN();
    897   TEARDOWN();
    898 }
    899 
    900 
    901 // Test FP instructions.
    902 //  - The inputs[] array should be an array of rawbits representations of
    903 //    doubles or floats. This ensures that exact bit comparisons can be
    904 //    performed.
    905 //  - The expected[] array should be an array of signed integers.
    906 template <typename Tn, typename Td>
    907 static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
    908                       const Tn inputs[], unsigned inputs_length,
    909                       const Td expected[], unsigned expected_length) {
    910   VIXL_ASSERT(inputs_length > 0);
    911 
    912   const unsigned results_length = inputs_length;
    913   Td * results = new Td[results_length];
    914 
    915   const unsigned d_bits = sizeof(Td) * 8;
    916   const unsigned n_bits = sizeof(Tn) * 8;
    917 
    918   TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
    919                      reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
    920 
    921   if (Test::sim_test_trace()) {
    922     // Print the results.
    923     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
    924     // There is no simple C++ literal for INT*_MIN that doesn't produce
    925     // warnings, so we use an appropriate constant in that case instead.
    926     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
    927     // the like) avoids warnings about comparing values with differing ranges.
    928     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
    929     const int64_t int_d_min = -(int_d_max) - 1;
    930     for (unsigned d = 0; d < results_length; d++) {
    931       if (results[d] == int_d_min) {
    932         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
    933       } else {
    934         // Some constants (such as those between INT32_MAX and UINT32_MAX)
    935         // trigger compiler warnings. To avoid these warnings, use an
    936         // appropriate macro to make the type explicit.
    937         int64_t result_int64 = static_cast<int64_t>(results[d]);
    938         if (result_int64 >= 0) {
    939           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
    940         } else {
    941           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
    942         }
    943       }
    944     }
    945     printf("};\n");
    946     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    947   } else {
    948     // Check the results.
    949     VIXL_CHECK(expected_length == results_length);
    950     unsigned error_count = 0;
    951     unsigned d = 0;
    952     for (unsigned n = 0; n < inputs_length; n++, d++) {
    953       if (results[d] != expected[d]) {
    954         if (++error_count > kErrorReportLimit) continue;
    955 
    956         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
    957                name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
    958                name, rawbits_to_fp(inputs[n]));
    959         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
    960                d_bits / 4, static_cast<uint64_t>(expected[d]),
    961                static_cast<int64_t>(expected[d]));
    962         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
    963                d_bits / 4, static_cast<uint64_t>(results[d]),
    964                static_cast<int64_t>(results[d]));
    965         printf("\n");
    966       }
    967     }
    968     VIXL_ASSERT(d == expected_length);
    969     if (error_count > kErrorReportLimit) {
    970       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    971     }
    972     VIXL_CHECK(error_count == 0);
    973   }
    974   delete[] results;
    975 }
    976 
    977 
    978 // Test FP instructions.
    979 //  - The inputs[] array should be an array of rawbits representations of
    980 //    doubles or floats. This ensures that exact bit comparisons can be
    981 //    performed.
    982 //  - The expected[] array should be an array of unsigned integers.
    983 template <typename Tn, typename Td>
    984 static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
    985                       const Tn inputs[], unsigned inputs_length,
    986                       const Td expected[], unsigned expected_length) {
    987   VIXL_ASSERT(inputs_length > 0);
    988 
    989   const unsigned results_length = inputs_length;
    990   Td * results = new Td[results_length];
    991 
    992   const unsigned d_bits = sizeof(Td) * 8;
    993   const unsigned n_bits = sizeof(Tn) * 8;
    994 
    995   TestFPToInt_Helper(helper,
    996                      reinterpret_cast<uintptr_t>(inputs), inputs_length,
    997                      reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
    998 
    999   if (Test::sim_test_trace()) {
   1000     // Print the results.
   1001     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
   1002     for (unsigned d = 0; d < results_length; d++) {
   1003       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
   1004     }
   1005     printf("};\n");
   1006     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1007   } else {
   1008     // Check the results.
   1009     VIXL_CHECK(expected_length == results_length);
   1010     unsigned error_count = 0;
   1011     unsigned d = 0;
   1012     for (unsigned n = 0; n < inputs_length; n++, d++) {
   1013       if (results[d] != expected[d]) {
   1014         if (++error_count > kErrorReportLimit) continue;
   1015 
   1016         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
   1017                name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
   1018                name, rawbits_to_fp(inputs[n]));
   1019         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1020                d_bits / 4, static_cast<uint64_t>(expected[d]),
   1021                static_cast<uint64_t>(expected[d]));
   1022         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1023                d_bits / 4, static_cast<uint64_t>(results[d]),
   1024                static_cast<uint64_t>(results[d]));
   1025         printf("\n");
   1026       }
   1027     }
   1028     VIXL_ASSERT(d == expected_length);
   1029     if (error_count > kErrorReportLimit) {
   1030       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1031     }
   1032     VIXL_CHECK(error_count == 0);
   1033   }
   1034   delete[] results;
   1035 }
   1036 
   1037 
   1038 // Test FP instructions.
   1039 //  - The inputs[] array should be an array of rawbits representations of
   1040 //    doubles or floats. This ensures that exact bit comparisons can be
   1041 //    performed.
   1042 //  - The expected[] array should be an array of signed integers.
   1043 template <typename Tn, typename Td>
   1044 static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
   1045                            const Tn inputs[], unsigned inputs_length,
   1046                            const Td expected[], unsigned expected_length) {
   1047   VIXL_ASSERT(inputs_length > 0);
   1048 
   1049   const unsigned d_bits = sizeof(Td) * 8;
   1050   const unsigned n_bits = sizeof(Tn) * 8;
   1051 
   1052   const unsigned results_length = inputs_length * (d_bits + 1);
   1053   Td * results = new Td[results_length];
   1054 
   1055   TestFPToFixed_Helper(helper,
   1056                        reinterpret_cast<uintptr_t>(inputs), inputs_length,
   1057                        reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
   1058 
   1059   if (Test::sim_test_trace()) {
   1060     // Print the results.
   1061     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
   1062     // There is no simple C++ literal for INT*_MIN that doesn't produce
   1063     // warnings, so we use an appropriate constant in that case instead.
   1064     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
   1065     // the like) avoids warnings about comparing values with differing ranges.
   1066     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
   1067     const int64_t int_d_min = -(int_d_max) - 1;
   1068     for (unsigned d = 0; d < results_length; d++) {
   1069       if (results[d] == int_d_min) {
   1070         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
   1071       } else {
   1072         // Some constants (such as those between INT32_MAX and UINT32_MAX)
   1073         // trigger compiler warnings. To avoid these warnings, use an
   1074         // appropriate macro to make the type explicit.
   1075         int64_t result_int64 = static_cast<int64_t>(results[d]);
   1076         if (result_int64 >= 0) {
   1077           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
   1078         } else {
   1079           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
   1080         }
   1081       }
   1082     }
   1083     printf("};\n");
   1084     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1085   } else {
   1086     // Check the results.
   1087     VIXL_CHECK(expected_length == results_length);
   1088     unsigned error_count = 0;
   1089     unsigned d = 0;
   1090     for (unsigned n = 0; n < inputs_length; n++) {
   1091       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
   1092         if (results[d] != expected[d]) {
   1093           if (++error_count > kErrorReportLimit) continue;
   1094 
   1095           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
   1096                  name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
   1097                  name, rawbits_to_fp(inputs[n]), fbits);
   1098           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
   1099                  d_bits / 4, static_cast<uint64_t>(expected[d]),
   1100                  static_cast<int64_t>(expected[d]));
   1101           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
   1102                  d_bits / 4, static_cast<uint64_t>(results[d]),
   1103                  static_cast<int64_t>(results[d]));
   1104           printf("\n");
   1105         }
   1106       }
   1107     }
   1108     VIXL_ASSERT(d == expected_length);
   1109     if (error_count > kErrorReportLimit) {
   1110       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1111     }
   1112     VIXL_CHECK(error_count == 0);
   1113   }
   1114   delete[] results;
   1115 }
   1116 
   1117 
   1118 // Test FP instructions.
   1119 //  - The inputs[] array should be an array of rawbits representations of
   1120 //    doubles or floats. This ensures that exact bit comparisons can be
   1121 //    performed.
   1122 //  - The expected[] array should be an array of unsigned integers.
   1123 template <typename Tn, typename Td>
   1124 static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
   1125                            const Tn inputs[], unsigned inputs_length,
   1126                            const Td expected[], unsigned expected_length) {
   1127   VIXL_ASSERT(inputs_length > 0);
   1128 
   1129   const unsigned d_bits = sizeof(Td) * 8;
   1130   const unsigned n_bits = sizeof(Tn) * 8;
   1131 
   1132   const unsigned results_length = inputs_length * (d_bits + 1);
   1133   Td * results = new Td[results_length];
   1134 
   1135   TestFPToFixed_Helper(helper,
   1136                        reinterpret_cast<uintptr_t>(inputs), inputs_length,
   1137                        reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
   1138 
   1139   if (Test::sim_test_trace()) {
   1140     // Print the results.
   1141     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
   1142     for (unsigned d = 0; d < results_length; d++) {
   1143       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
   1144     }
   1145     printf("};\n");
   1146     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1147   } else {
   1148     // Check the results.
   1149     VIXL_CHECK(expected_length == results_length);
   1150     unsigned error_count = 0;
   1151     unsigned d = 0;
   1152     for (unsigned n = 0; n < inputs_length; n++) {
   1153       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
   1154         if (results[d] != expected[d]) {
   1155           if (++error_count > kErrorReportLimit) continue;
   1156 
   1157           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
   1158                  name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
   1159                  name, rawbits_to_fp(inputs[n]), fbits);
   1160           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1161                  d_bits / 4, static_cast<uint64_t>(expected[d]),
   1162                  static_cast<uint64_t>(expected[d]));
   1163           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1164                  d_bits / 4, static_cast<uint64_t>(results[d]),
   1165                  static_cast<uint64_t>(results[d]));
   1166           printf("\n");
   1167         }
   1168       }
   1169     }
   1170     VIXL_ASSERT(d == expected_length);
   1171     if (error_count > kErrorReportLimit) {
   1172       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1173     }
   1174     VIXL_CHECK(error_count == 0);
   1175   }
   1176   delete[] results;
   1177 }
   1178 
   1179 
   1180 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
   1181 
   1182 
   1183 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
   1184                                uintptr_t inputs_n, unsigned inputs_n_length,
   1185                                uintptr_t results,
   1186                                VectorFormat vd_form,
   1187                                VectorFormat vn_form) {
   1188   VIXL_ASSERT(vd_form != kFormatUndefined);
   1189   VIXL_ASSERT(vn_form != kFormatUndefined);
   1190 
   1191   SETUP();
   1192   START();
   1193 
   1194   // Roll up the loop to keep the code size down.
   1195   Label loop_n;
   1196 
   1197   Register out = x0;
   1198   Register inputs_n_base = x1;
   1199   Register inputs_n_last_16bytes = x3;
   1200   Register index_n = x5;
   1201 
   1202   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1203   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1204   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1205 
   1206   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1207   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1208   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1209   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1210   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1211 
   1212 
   1213   // These will be either a D- or a Q-register form, with a single lane
   1214   // (for use in scalar load and store operations).
   1215   VRegister vd = VRegister(0, vd_bits);
   1216   VRegister vn = v1.V16B();
   1217   VRegister vntmp = v3.V16B();
   1218 
   1219   // These will have the correct format for use when calling 'helper'.
   1220   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
   1221   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1222 
   1223   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1224   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1225 
   1226   __ Mov(out, results);
   1227 
   1228   __ Mov(inputs_n_base, inputs_n);
   1229   __ Mov(inputs_n_last_16bytes,
   1230          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
   1231 
   1232   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   1233 
   1234   __ Mov(index_n, 0);
   1235   __ Bind(&loop_n);
   1236 
   1237   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
   1238                                   vn_lane_bytes_log2));
   1239   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   1240 
   1241   // Set the destination to zero.
   1242   // TODO: Setting the destination to values other than zero
   1243   //       might be a better test for instructions such as sqxtn2
   1244   //       which may leave parts of V registers unchanged.
   1245   __ Movi(vd.V16B(), 0);
   1246 
   1247   {
   1248     SingleEmissionCheckScope guard(&masm);
   1249     (masm.*helper)(vd_helper, vn_helper);
   1250   }
   1251   __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
   1252 
   1253   __ Add(index_n, index_n, 1);
   1254   __ Cmp(index_n, inputs_n_length);
   1255   __ B(lo, &loop_n);
   1256 
   1257   END();
   1258   RUN();
   1259   TEARDOWN();
   1260 }
   1261 
   1262 
   1263 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1264 // arrays of rawbit representation of input values. This ensures that
   1265 // exact bit comparisons can be performed.
   1266 template <typename Td, typename Tn>
   1267 static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
   1268                         const Tn inputs_n[], unsigned inputs_n_length,
   1269                         const Td expected[], unsigned expected_length,
   1270                         VectorFormat vd_form,
   1271                         VectorFormat vn_form) {
   1272   VIXL_ASSERT(inputs_n_length > 0);
   1273 
   1274   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1275   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1276   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1277 
   1278   const unsigned results_length = inputs_n_length;
   1279   Td* results = new Td[results_length * vd_lane_count];
   1280   const unsigned lane_bit = sizeof(Td) * 8;
   1281   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   1282 
   1283   Test1OpNEON_Helper(helper,
   1284                      reinterpret_cast<uintptr_t>(inputs_n),
   1285                      inputs_n_length,
   1286                      reinterpret_cast<uintptr_t>(results),
   1287                      vd_form, vn_form);
   1288 
   1289   if (Test::sim_test_trace()) {
   1290     // Print the results.
   1291     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1292     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1293       printf(" ");
   1294       // Output a separate result for each element of the result vector.
   1295       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1296         unsigned index = lane + (iteration * vd_lane_count);
   1297         printf(" 0x%0*" PRIx64 ",",
   1298                lane_len_in_hex,
   1299                static_cast<uint64_t>(results[index]));
   1300       }
   1301       printf("\n");
   1302     }
   1303 
   1304     printf("};\n");
   1305     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1306            name,
   1307            results_length);
   1308   } else {
   1309     // Check the results.
   1310     VIXL_CHECK(expected_length == results_length);
   1311     unsigned error_count = 0;
   1312     unsigned d = 0;
   1313     const char* padding = "                    ";
   1314     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1315     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
   1316       bool error_in_vector = false;
   1317 
   1318       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1319         unsigned output_index = (n * vd_lane_count) + lane;
   1320 
   1321         if (results[output_index] != expected[output_index]) {
   1322           error_in_vector = true;
   1323           break;
   1324         }
   1325       }
   1326 
   1327       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1328         printf("%s\n", name);
   1329         printf(" Vn%.*s| Vd%.*s| Expected\n",
   1330                 lane_len_in_hex+1, padding,
   1331                 lane_len_in_hex+1, padding);
   1332 
   1333         const unsigned first_index_n =
   1334           inputs_n_length - (16 / vn_lane_bytes) + n + 1;
   1335 
   1336         for (unsigned lane = 0;
   1337              lane < std::max(vd_lane_count, vn_lane_count);
   1338              lane++) {
   1339           unsigned output_index = (n * vd_lane_count) + lane;
   1340           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
   1341 
   1342           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
   1343                   "| 0x%0*" PRIx64 "\n",
   1344                   results[output_index] != expected[output_index] ? '*' : ' ',
   1345                   lane_len_in_hex,
   1346                   static_cast<uint64_t>(inputs_n[input_index_n]),
   1347                   lane_len_in_hex,
   1348                   static_cast<uint64_t>(results[output_index]),
   1349                   lane_len_in_hex,
   1350                   static_cast<uint64_t>(expected[output_index]));
   1351         }
   1352       }
   1353     }
   1354     VIXL_ASSERT(d == expected_length);
   1355     if (error_count > kErrorReportLimit) {
   1356       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1357     }
   1358     VIXL_CHECK(error_count == 0);
   1359   }
   1360   delete[] results;
   1361 }
   1362 
   1363 
   1364 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
   1365 //      where <V> is one of B, H, S or D registers.
   1366 //      e.g. saddlv H1, v0.8B
   1367 
   1368 // TODO: Change tests to store all lanes of the resulting V register.
   1369 //       Some tests store all 128 bits of the resulting V register to
   1370 //       check the simulator's behaviour on the rest of the register.
   1371 //       This is better than storing the affected lanes only.
   1372 //       Change any tests such as the 'Across' template to do the same.
   1373 
   1374 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
   1375                                      uintptr_t inputs_n,
   1376                                      unsigned inputs_n_length,
   1377                                      uintptr_t results,
   1378                                      VectorFormat vd_form,
   1379                                      VectorFormat vn_form) {
   1380   VIXL_ASSERT(vd_form != kFormatUndefined);
   1381   VIXL_ASSERT(vn_form != kFormatUndefined);
   1382 
   1383   SETUP();
   1384   START();
   1385 
   1386   // Roll up the loop to keep the code size down.
   1387   Label loop_n;
   1388 
   1389   Register out = x0;
   1390   Register inputs_n_base = x1;
   1391   Register inputs_n_last_vector = x3;
   1392   Register index_n = x5;
   1393 
   1394   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1395   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1396 
   1397   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1398   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1399   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1400   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1401   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1402 
   1403 
   1404   // These will be either a D- or a Q-register form, with a single lane
   1405   // (for use in scalar load and store operations).
   1406   VRegister vd = VRegister(0, vd_bits);
   1407   VRegister vn = VRegister(1, vn_bits);
   1408   VRegister vntmp = VRegister(3, vn_bits);
   1409 
   1410   // These will have the correct format for use when calling 'helper'.
   1411   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1412 
   1413   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1414   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1415 
   1416   // Same registers for use in the 'ext' instructions.
   1417   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
   1418   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
   1419 
   1420   __ Mov(out, results);
   1421 
   1422   __ Mov(inputs_n_base, inputs_n);
   1423   __ Mov(inputs_n_last_vector,
   1424          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
   1425 
   1426   __ Ldr(vn, MemOperand(inputs_n_last_vector));
   1427 
   1428   __ Mov(index_n, 0);
   1429   __ Bind(&loop_n);
   1430 
   1431   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
   1432                                   vn_lane_bytes_log2));
   1433   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
   1434 
   1435   // Set the destination to zero for tests such as '[r]shrn2'.
   1436   // TODO: Setting the destination to values other than zero
   1437   //       might be a better test for instructions such as sqxtn2
   1438   //       which may leave parts of V registers unchanged.
   1439   __ Movi(vd.V16B(), 0);
   1440 
   1441   {
   1442     SingleEmissionCheckScope guard(&masm);
   1443     (masm.*helper)(vd, vn_helper);
   1444   }
   1445   __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
   1446 
   1447   __ Add(index_n, index_n, 1);
   1448   __ Cmp(index_n, inputs_n_length);
   1449   __ B(lo, &loop_n);
   1450 
   1451   END();
   1452   RUN();
   1453   TEARDOWN();
   1454 }
   1455 
   1456 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1457 // arrays of rawbit representation of input values. This ensures that
   1458 // exact bit comparisons can be performed.
   1459 template <typename Td, typename Tn>
   1460 static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
   1461                               const Tn inputs_n[], unsigned inputs_n_length,
   1462                               const Td expected[], unsigned expected_length,
   1463                               VectorFormat vd_form,
   1464                               VectorFormat vn_form) {
   1465   VIXL_ASSERT(inputs_n_length > 0);
   1466 
   1467   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1468 
   1469   const unsigned results_length = inputs_n_length;
   1470   Td* results = new Td[results_length * vd_lane_count];
   1471   const unsigned lane_bit = sizeof(Td) * 8;
   1472   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   1473 
   1474   Test1OpAcrossNEON_Helper(helper,
   1475                            reinterpret_cast<uintptr_t>(inputs_n),
   1476                            inputs_n_length,
   1477                            reinterpret_cast<uintptr_t>(results),
   1478                            vd_form, vn_form);
   1479 
   1480   if (Test::sim_test_trace()) {
   1481     // Print the results.
   1482     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1483     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1484       printf(" ");
   1485       // Output a separate result for each element of the result vector.
   1486       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1487         unsigned index = lane + (iteration * vd_lane_count);
   1488         printf(" 0x%0*" PRIx64 ",",
   1489                lane_len_in_hex,
   1490                static_cast<uint64_t>(results[index]));
   1491       }
   1492       printf("\n");
   1493     }
   1494 
   1495     printf("};\n");
   1496     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1497            name,
   1498            results_length);
   1499   } else {
   1500     // Check the results.
   1501     VIXL_CHECK(expected_length == results_length);
   1502     unsigned error_count = 0;
   1503     unsigned d = 0;
   1504     const char* padding = "                    ";
   1505     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1506     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
   1507       bool error_in_vector = false;
   1508 
   1509       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1510         unsigned output_index = (n * vd_lane_count) + lane;
   1511 
   1512         if (results[output_index] != expected[output_index]) {
   1513           error_in_vector = true;
   1514           break;
   1515         }
   1516       }
   1517 
   1518       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1519         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1520 
   1521         printf("%s\n", name);
   1522         printf(" Vn%.*s| Vd%.*s| Expected\n",
   1523                 lane_len_in_hex+1, padding,
   1524                 lane_len_in_hex+1, padding);
   1525 
   1526         // TODO: In case of an error, all tests print out as many elements as
   1527         //       there are lanes in the output or input vectors. This way
   1528         //       the viewer can read all the values that were needed for the
   1529         //       operation but the output contains also unnecessary values.
   1530         //       These prints can be improved according to the arguments
   1531         //       passed to test functions.
   1532         //       This output for the 'Across' category has the required
   1533         //       modifications.
   1534         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
   1535           unsigned output_index = n * vd_lane_count;
   1536           unsigned input_index_n = (inputs_n_length - vn_lane_count +
   1537               n + 1 + lane) % inputs_n_length;
   1538 
   1539           if (vn_lane_count-1 == lane) {  // Is this the last lane?
   1540             // Print the result element(s) in the last lane only.
   1541             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
   1542                   "| 0x%0*" PRIx64 "\n",
   1543                   results[output_index] != expected[output_index] ? '*' : ' ',
   1544                   lane_len_in_hex,
   1545                   static_cast<uint64_t>(inputs_n[input_index_n]),
   1546                   lane_len_in_hex,
   1547                   static_cast<uint64_t>(results[output_index]),
   1548                   lane_len_in_hex,
   1549                   static_cast<uint64_t>(expected[output_index]));
   1550           } else {
   1551             printf(" 0x%0*" PRIx64 " |   %.*s|   %.*s\n",
   1552                   lane_len_in_hex,
   1553                   static_cast<uint64_t>(inputs_n[input_index_n]),
   1554                   lane_len_in_hex+1, padding,
   1555                   lane_len_in_hex+1, padding);
   1556           }
   1557         }
   1558       }
   1559     }
   1560     VIXL_ASSERT(d == expected_length);
   1561     if (error_count > kErrorReportLimit) {
   1562       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1563     }
   1564     VIXL_CHECK(error_count == 0);
   1565   }
   1566   delete[] results;
   1567 }
   1568 
   1569 
   1570 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
   1571 
   1572 // TODO: Iterate over inputs_d once the traces file is split.
   1573 
   1574 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
   1575                                uintptr_t inputs_d,
   1576                                uintptr_t inputs_n, unsigned inputs_n_length,
   1577                                uintptr_t inputs_m, unsigned inputs_m_length,
   1578                                uintptr_t results,
   1579                                VectorFormat vd_form,
   1580                                VectorFormat vn_form,
   1581                                VectorFormat vm_form) {
   1582   VIXL_ASSERT(vd_form != kFormatUndefined);
   1583   VIXL_ASSERT(vn_form != kFormatUndefined);
   1584   VIXL_ASSERT(vm_form != kFormatUndefined);
   1585 
   1586   SETUP();
   1587   START();
   1588 
   1589   // Roll up the loop to keep the code size down.
   1590   Label loop_n, loop_m;
   1591 
   1592   Register out = x0;
   1593   Register inputs_n_base = x1;
   1594   Register inputs_m_base = x2;
   1595   Register inputs_d_base = x3;
   1596   Register inputs_n_last_16bytes = x4;
   1597   Register inputs_m_last_16bytes = x5;
   1598   Register index_n = x6;
   1599   Register index_m = x7;
   1600 
   1601   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1602   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1603   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1604 
   1605   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1606   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1607   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1608   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1609   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1610 
   1611   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
   1612   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
   1613   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
   1614   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
   1615   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
   1616 
   1617 
   1618   // Always load and store 128 bits regardless of the format.
   1619   VRegister vd = v0.V16B();
   1620   VRegister vn = v1.V16B();
   1621   VRegister vm = v2.V16B();
   1622   VRegister vntmp = v3.V16B();
   1623   VRegister vmtmp = v4.V16B();
   1624   VRegister vres = v5.V16B();
   1625 
   1626   // These will have the correct format for calling the 'helper'.
   1627   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1628   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
   1629   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   1630 
   1631   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1632   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1633   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
   1634 
   1635   __ Mov(out, results);
   1636 
   1637   __ Mov(inputs_d_base, inputs_d);
   1638 
   1639   __ Mov(inputs_n_base, inputs_n);
   1640   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
   1641   __ Mov(inputs_m_base, inputs_m);
   1642   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
   1643 
   1644   __ Ldr(vd, MemOperand(inputs_d_base));
   1645   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   1646   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
   1647 
   1648   __ Mov(index_n, 0);
   1649   __ Bind(&loop_n);
   1650 
   1651   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
   1652                                   vn_lane_bytes_log2));
   1653   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   1654 
   1655   __ Mov(index_m, 0);
   1656   __ Bind(&loop_m);
   1657 
   1658   __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
   1659                                   vm_lane_bytes_log2));
   1660   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
   1661 
   1662   __ Mov(vres, vd);
   1663   {
   1664     SingleEmissionCheckScope guard(&masm);
   1665     (masm.*helper)(vres_helper, vn_helper, vm_helper);
   1666   }
   1667   __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
   1668 
   1669   __ Add(index_m, index_m, 1);
   1670   __ Cmp(index_m, inputs_m_length);
   1671   __ B(lo, &loop_m);
   1672 
   1673   __ Add(index_n, index_n, 1);
   1674   __ Cmp(index_n, inputs_n_length);
   1675   __ B(lo, &loop_n);
   1676 
   1677   END();
   1678   RUN();
   1679   TEARDOWN();
   1680 }
   1681 
   1682 
   1683 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1684 // arrays of rawbit representation of input values. This ensures that
   1685 // exact bit comparisons can be performed.
   1686 template <typename Td, typename Tn, typename Tm>
   1687 static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
   1688                         const Td inputs_d[],
   1689                         const Tn inputs_n[], unsigned inputs_n_length,
   1690                         const Tm inputs_m[], unsigned inputs_m_length,
   1691                         const Td expected[], unsigned expected_length,
   1692                         VectorFormat vd_form,
   1693                         VectorFormat vn_form,
   1694                         VectorFormat vm_form) {
   1695   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
   1696 
   1697   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
   1698 
   1699   const unsigned results_length = inputs_n_length * inputs_m_length;
   1700   Td* results = new Td[results_length * vd_lane_count];
   1701   const unsigned lane_bit = sizeof(Td) * 8;
   1702   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
   1703 
   1704   Test2OpNEON_Helper(helper,
   1705                      reinterpret_cast<uintptr_t>(inputs_d),
   1706                      reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
   1707                      reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
   1708                      reinterpret_cast<uintptr_t>(results),
   1709                      vd_form, vn_form, vm_form);
   1710 
   1711   if (Test::sim_test_trace()) {
   1712     // Print the results.
   1713     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1714     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1715       printf(" ");
   1716       // Output a separate result for each element of the result vector.
   1717       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1718         unsigned index = lane + (iteration * vd_lane_count);
   1719         printf(" 0x%0*" PRIx64 ",",
   1720                lane_len_in_hex,
   1721                static_cast<uint64_t>(results[index]));
   1722       }
   1723       printf("\n");
   1724     }
   1725 
   1726     printf("};\n");
   1727     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1728            name,
   1729            results_length);
   1730   } else {
   1731     // Check the results.
   1732     VIXL_CHECK(expected_length == results_length);
   1733     unsigned error_count = 0;
   1734     unsigned d = 0;
   1735     const char* padding = "                    ";
   1736     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1737     for (unsigned n = 0; n < inputs_n_length; n++) {
   1738       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
   1739         bool error_in_vector = false;
   1740 
   1741         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1742           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   1743               (m * vd_lane_count) + lane;
   1744 
   1745           if (results[output_index] != expected[output_index]) {
   1746             error_in_vector = true;
   1747             break;
   1748           }
   1749         }
   1750 
   1751         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1752           printf("%s\n", name);
   1753           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
   1754                  lane_len_in_hex+1, padding,
   1755                  lane_len_in_hex+1, padding,
   1756                  lane_len_in_hex+1, padding,
   1757                  lane_len_in_hex+1, padding);
   1758 
   1759           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1760             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   1761                 (m * vd_lane_count) + lane;
   1762             unsigned input_index_n = (inputs_n_length - vd_lane_count +
   1763                 n + 1 + lane) % inputs_n_length;
   1764             unsigned input_index_m = (inputs_m_length - vd_lane_count +
   1765                 m + 1 + lane) % inputs_m_length;
   1766 
   1767             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
   1768                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   1769                    results[output_index] != expected[output_index] ? '*' : ' ',
   1770                    lane_len_in_hex,
   1771                    static_cast<uint64_t>(inputs_d[lane]),
   1772                    lane_len_in_hex,
   1773                    static_cast<uint64_t>(inputs_n[input_index_n]),
   1774                    lane_len_in_hex,
   1775                    static_cast<uint64_t>(inputs_m[input_index_m]),
   1776                    lane_len_in_hex,
   1777                    static_cast<uint64_t>(results[output_index]),
   1778                    lane_len_in_hex,
   1779                    static_cast<uint64_t>(expected[output_index]));
   1780           }
   1781         }
   1782       }
   1783     }
   1784     VIXL_ASSERT(d == expected_length);
   1785     if (error_count > kErrorReportLimit) {
   1786       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1787     }
   1788     VIXL_CHECK(error_count == 0);
   1789   }
   1790   delete[] results;
   1791 }
   1792 
   1793 
   1794 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
   1795 
   1796 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
   1797                                      uintptr_t inputs_d,
   1798                                      uintptr_t inputs_n,
   1799                                      unsigned inputs_n_length,
   1800                                      uintptr_t inputs_m,
   1801                                      unsigned inputs_m_length,
   1802                                      const int indices[],
   1803                                      unsigned indices_length,
   1804                                      uintptr_t results,
   1805                                      VectorFormat vd_form,
   1806                                      VectorFormat vn_form,
   1807                                      VectorFormat vm_form) {
   1808   VIXL_ASSERT(vd_form != kFormatUndefined);
   1809   VIXL_ASSERT(vn_form != kFormatUndefined);
   1810   VIXL_ASSERT(vm_form != kFormatUndefined);
   1811 
   1812   SETUP();
   1813   START();
   1814 
   1815   // Roll up the loop to keep the code size down.
   1816   Label loop_n, loop_m;
   1817 
   1818   Register out = x0;
   1819   Register inputs_n_base = x1;
   1820   Register inputs_m_base = x2;
   1821   Register inputs_d_base = x3;
   1822   Register inputs_n_last_16bytes = x4;
   1823   Register inputs_m_last_16bytes = x5;
   1824   Register index_n = x6;
   1825   Register index_m = x7;
   1826 
   1827   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1828   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1829   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1830 
   1831   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1832   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1833   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1834   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1835   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1836 
   1837   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
   1838   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
   1839   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
   1840   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
   1841   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
   1842 
   1843 
   1844   // Always load and store 128 bits regardless of the format.
   1845   VRegister vd = v0.V16B();
   1846   VRegister vn = v1.V16B();
   1847   VRegister vm = v2.V16B();
   1848   VRegister vntmp = v3.V16B();
   1849   VRegister vmtmp = v4.V16B();
   1850   VRegister vres = v5.V16B();
   1851 
   1852   // These will have the correct format for calling the 'helper'.
   1853   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1854   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
   1855   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   1856 
   1857   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1858   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1859   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
   1860 
   1861   __ Mov(out, results);
   1862 
   1863   __ Mov(inputs_d_base, inputs_d);
   1864 
   1865   __ Mov(inputs_n_base, inputs_n);
   1866   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
   1867   __ Mov(inputs_m_base, inputs_m);
   1868   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
   1869 
   1870   __ Ldr(vd, MemOperand(inputs_d_base));
   1871   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   1872   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
   1873 
   1874   __ Mov(index_n, 0);
   1875   __ Bind(&loop_n);
   1876 
   1877   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
   1878                                   vn_lane_bytes_log2));
   1879   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   1880 
   1881   __ Mov(index_m, 0);
   1882   __ Bind(&loop_m);
   1883 
   1884   __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
   1885                                   vm_lane_bytes_log2));
   1886   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
   1887 
   1888   __ Mov(vres, vd);
   1889   {
   1890     for (unsigned i = 0; i < indices_length; i++) {
   1891       {
   1892         SingleEmissionCheckScope guard(&masm);
   1893         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
   1894       }
   1895       __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
   1896     }
   1897   }
   1898 
   1899   __ Add(index_m, index_m, 1);
   1900   __ Cmp(index_m, inputs_m_length);
   1901   __ B(lo, &loop_m);
   1902 
   1903   __ Add(index_n, index_n, 1);
   1904   __ Cmp(index_n, inputs_n_length);
   1905   __ B(lo, &loop_n);
   1906 
   1907   END();
   1908   RUN();
   1909   TEARDOWN();
   1910 }
   1911 
   1912 
   1913 
   1914 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1915 // arrays of rawbit representation of input values. This ensures that
   1916 // exact bit comparisons can be performed.
   1917 template <typename Td, typename Tn, typename Tm>
   1918 static void TestByElementNEON(const char *name,
   1919                               TestByElementNEONHelper_t helper,
   1920                               const Td inputs_d[],
   1921                               const Tn inputs_n[], unsigned inputs_n_length,
   1922                               const Tm inputs_m[], unsigned inputs_m_length,
   1923                               const int indices[], unsigned indices_length,
   1924                               const Td expected[], unsigned expected_length,
   1925                               VectorFormat vd_form,
   1926                               VectorFormat vn_form,
   1927                               VectorFormat vm_form) {
   1928   VIXL_ASSERT(inputs_n_length > 0);
   1929   VIXL_ASSERT(inputs_m_length > 0);
   1930   VIXL_ASSERT(indices_length > 0);
   1931 
   1932   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
   1933 
   1934   const unsigned results_length = inputs_n_length * inputs_m_length *
   1935                                   indices_length;
   1936   Td* results = new Td[results_length * vd_lane_count];
   1937   const unsigned lane_bit = sizeof(Td) * 8;
   1938   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
   1939 
   1940   TestByElementNEON_Helper(helper,
   1941     reinterpret_cast<uintptr_t>(inputs_d),
   1942     reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
   1943     reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
   1944     indices, indices_length,
   1945     reinterpret_cast<uintptr_t>(results),
   1946     vd_form, vn_form, vm_form);
   1947 
   1948   if (Test::sim_test_trace()) {
   1949     // Print the results.
   1950     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1951     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1952       printf(" ");
   1953       // Output a separate result for each element of the result vector.
   1954       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1955         unsigned index = lane + (iteration * vd_lane_count);
   1956         printf(" 0x%0*" PRIx64 ",",
   1957                lane_len_in_hex,
   1958                static_cast<uint64_t>(results[index]));
   1959       }
   1960       printf("\n");
   1961     }
   1962 
   1963     printf("};\n");
   1964     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1965            name,
   1966            results_length);
   1967   } else {
   1968     // Check the results.
   1969     VIXL_CHECK(expected_length == results_length);
   1970     unsigned error_count = 0;
   1971     unsigned d = 0;
   1972     const char* padding = "                    ";
   1973     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1974     for (unsigned n = 0; n < inputs_n_length; n++) {
   1975       for (unsigned m = 0; m < inputs_m_length; m++) {
   1976         for (unsigned index = 0; index < indices_length; index++, d++) {
   1977           bool error_in_vector = false;
   1978 
   1979           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1980             unsigned output_index =
   1981                 (n * inputs_m_length * indices_length * vd_lane_count) +
   1982                 (m * indices_length * vd_lane_count) +
   1983                 (index * vd_lane_count) + lane;
   1984 
   1985             if (results[output_index] != expected[output_index]) {
   1986               error_in_vector = true;
   1987               break;
   1988             }
   1989           }
   1990 
   1991           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1992             printf("%s\n", name);
   1993             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
   1994                   lane_len_in_hex+1, padding,
   1995                   lane_len_in_hex+1, padding,
   1996                   lane_len_in_hex+1, padding,
   1997                   lane_len_in_hex+1, padding);
   1998 
   1999             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2000               unsigned output_index =
   2001                   (n * inputs_m_length * indices_length * vd_lane_count) +
   2002                   (m * indices_length * vd_lane_count) +
   2003                   (index * vd_lane_count) + lane;
   2004               unsigned input_index_n = (inputs_n_length - vd_lane_count +
   2005                   n + 1 + lane) % inputs_n_length;
   2006               unsigned input_index_m = (inputs_m_length - vd_lane_count +
   2007                   m + 1 + lane) % inputs_m_length;
   2008 
   2009               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
   2010                 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2011                 results[output_index] != expected[output_index] ? '*' : ' ',
   2012                 lane_len_in_hex,
   2013                 static_cast<uint64_t>(inputs_d[lane]),
   2014                 lane_len_in_hex,
   2015                 static_cast<uint64_t>(inputs_n[input_index_n]),
   2016                 lane_len_in_hex,
   2017                 static_cast<uint64_t>(inputs_m[input_index_m]),
   2018                 indices[index],
   2019                 lane_len_in_hex,
   2020                 static_cast<uint64_t>(results[output_index]),
   2021                 lane_len_in_hex,
   2022                 static_cast<uint64_t>(expected[output_index]));
   2023             }
   2024           }
   2025         }
   2026       }
   2027     }
   2028     VIXL_ASSERT(d == expected_length);
   2029     if (error_count > kErrorReportLimit) {
   2030       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2031     }
   2032     VIXL_CHECK(error_count == 0);
   2033   }
   2034   delete[] results;
   2035 }
   2036 
   2037 
   2038 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
   2039 
   2040 
   2041 template <typename Tm>
   2042 void Test2OpImmNEON_Helper(
   2043     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
   2044     uintptr_t inputs_n,
   2045     unsigned inputs_n_length,
   2046     const Tm inputs_m[],
   2047     unsigned inputs_m_length,
   2048     uintptr_t results,
   2049     VectorFormat vd_form,
   2050     VectorFormat vn_form) {
   2051   VIXL_ASSERT(vd_form != kFormatUndefined &&
   2052               vn_form != kFormatUndefined);
   2053 
   2054   SETUP();
   2055   START();
   2056 
   2057   // Roll up the loop to keep the code size down.
   2058   Label loop_n;
   2059 
   2060   Register out = x0;
   2061   Register inputs_n_base = x1;
   2062   Register inputs_n_last_16bytes = x3;
   2063   Register index_n = x5;
   2064 
   2065   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   2066   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   2067   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2068 
   2069   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   2070   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2071   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2072   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   2073   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   2074 
   2075 
   2076   // These will be either a D- or a Q-register form, with a single lane
   2077   // (for use in scalar load and store operations).
   2078   VRegister vd = VRegister(0, vd_bits);
   2079   VRegister vn = v1.V16B();
   2080   VRegister vntmp = v3.V16B();
   2081 
   2082   // These will have the correct format for use when calling 'helper'.
   2083   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
   2084   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2085 
   2086   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2087   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   2088 
   2089   __ Mov(out, results);
   2090 
   2091   __ Mov(inputs_n_base, inputs_n);
   2092   __ Mov(inputs_n_last_16bytes,
   2093          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
   2094 
   2095   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   2096 
   2097   __ Mov(index_n, 0);
   2098   __ Bind(&loop_n);
   2099 
   2100   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
   2101                                   vn_lane_bytes_log2));
   2102   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   2103 
   2104   // Set the destination to zero for tests such as '[r]shrn2'.
   2105   // TODO: Setting the destination to values other than zero might be a better
   2106   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
   2107   __ Movi(vd.V16B(), 0);
   2108 
   2109   {
   2110     for (unsigned i = 0; i < inputs_m_length; i++) {
   2111       {
   2112         SingleEmissionCheckScope guard(&masm);
   2113         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
   2114       }
   2115       __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
   2116     }
   2117   }
   2118 
   2119   __ Add(index_n, index_n, 1);
   2120   __ Cmp(index_n, inputs_n_length);
   2121   __ B(lo, &loop_n);
   2122 
   2123   END();
   2124   RUN();
   2125   TEARDOWN();
   2126 }
   2127 
   2128 
   2129 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2130 // arrays of rawbit representation of input values. This ensures that
   2131 // exact bit comparisons can be performed.
   2132 template <typename Td, typename Tn, typename Tm>
   2133 static void Test2OpImmNEON(
   2134     const char * name,
   2135     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
   2136     const Tn inputs_n[], unsigned inputs_n_length,
   2137     const Tm inputs_m[], unsigned inputs_m_length,
   2138     const Td expected[], unsigned expected_length,
   2139     VectorFormat vd_form,
   2140     VectorFormat vn_form) {
   2141   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
   2142 
   2143   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2144   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2145   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2146 
   2147   const unsigned results_length = inputs_n_length * inputs_m_length;
   2148   Td* results = new Td[results_length * vd_lane_count];
   2149   const unsigned lane_bit = sizeof(Td) * 8;
   2150   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   2151 
   2152   Test2OpImmNEON_Helper(helper,
   2153                         reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
   2154                         inputs_m, inputs_m_length,
   2155                         reinterpret_cast<uintptr_t>(results),
   2156                         vd_form, vn_form);
   2157 
   2158   if (Test::sim_test_trace()) {
   2159     // Print the results.
   2160     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2161     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2162       printf(" ");
   2163       // Output a separate result for each element of the result vector.
   2164       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2165         unsigned index = lane + (iteration * vd_lane_count);
   2166         printf(" 0x%0*" PRIx64 ",",
   2167                lane_len_in_hex,
   2168                static_cast<uint64_t>(results[index]));
   2169       }
   2170       printf("\n");
   2171     }
   2172 
   2173     printf("};\n");
   2174     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2175            name,
   2176            results_length);
   2177   } else {
   2178     // Check the results.
   2179     VIXL_CHECK(expected_length == results_length);
   2180     unsigned error_count = 0;
   2181     unsigned d = 0;
   2182     const char* padding = "                    ";
   2183     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2184     for (unsigned n = 0; n < inputs_n_length; n++) {
   2185       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
   2186         bool error_in_vector = false;
   2187 
   2188         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2189           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2190               (m * vd_lane_count) + lane;
   2191 
   2192           if (results[output_index] != expected[output_index]) {
   2193             error_in_vector = true;
   2194             break;
   2195           }
   2196         }
   2197 
   2198         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2199           printf("%s\n", name);
   2200           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
   2201                  lane_len_in_hex+1, padding,
   2202                  lane_len_in_hex, padding,
   2203                  lane_len_in_hex+1, padding);
   2204 
   2205         const unsigned first_index_n =
   2206           inputs_n_length - (16 / vn_lane_bytes) + n + 1;
   2207 
   2208         for (unsigned lane = 0;
   2209              lane < std::max(vd_lane_count, vn_lane_count);
   2210              lane++) {
   2211             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2212                 (m * vd_lane_count) + lane;
   2213             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
   2214             unsigned input_index_m = m;
   2215 
   2216             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
   2217                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2218                    results[output_index] != expected[output_index] ? '*' : ' ',
   2219                    lane_len_in_hex,
   2220                    static_cast<uint64_t>(inputs_n[input_index_n]),
   2221                    lane_len_in_hex,
   2222                    static_cast<uint64_t>(inputs_m[input_index_m]),
   2223                    lane_len_in_hex,
   2224                    static_cast<uint64_t>(results[output_index]),
   2225                    lane_len_in_hex,
   2226                    static_cast<uint64_t>(expected[output_index]));
   2227           }
   2228         }
   2229       }
   2230     }
   2231     VIXL_ASSERT(d == expected_length);
   2232     if (error_count > kErrorReportLimit) {
   2233       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2234     }
   2235     VIXL_CHECK(error_count == 0);
   2236   }
   2237   delete[] results;
   2238 }
   2239 
   2240 
   2241 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
   2242 
   2243 
   2244 static void TestOpImmOpImmNEON_Helper(
   2245   TestOpImmOpImmVdUpdateNEONHelper_t helper,
   2246   uintptr_t inputs_d,
   2247   const int inputs_imm1[], unsigned inputs_imm1_length,
   2248   uintptr_t inputs_n, unsigned inputs_n_length,
   2249   const int inputs_imm2[], unsigned inputs_imm2_length,
   2250   uintptr_t results,
   2251   VectorFormat vd_form, VectorFormat vn_form) {
   2252   VIXL_ASSERT(vd_form != kFormatUndefined);
   2253   VIXL_ASSERT(vn_form != kFormatUndefined);
   2254 
   2255   SETUP();
   2256   START();
   2257 
   2258   // Roll up the loop to keep the code size down.
   2259   Label loop_n;
   2260 
   2261   Register out = x0;
   2262   Register inputs_d_base = x1;
   2263   Register inputs_n_base = x2;
   2264   Register inputs_n_last_vector = x4;
   2265   Register index_n = x6;
   2266 
   2267   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   2268   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   2269   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2270 
   2271   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   2272   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2273   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2274   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   2275   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   2276 
   2277 
   2278   // These will be either a D- or a Q-register form, with a single lane
   2279   // (for use in scalar load and store operations).
   2280   VRegister vd = VRegister(0, vd_bits);
   2281   VRegister vn = VRegister(1, vn_bits);
   2282   VRegister vntmp = VRegister(4, vn_bits);
   2283   VRegister vres = VRegister(5, vn_bits);
   2284 
   2285   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2286   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   2287 
   2288   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2289   VRegister vntmp_single = VRegister(4, vn_lane_bits);
   2290 
   2291   // Same registers for use in the 'ext' instructions.
   2292   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
   2293   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
   2294 
   2295   __ Mov(out, results);
   2296 
   2297   __ Mov(inputs_d_base, inputs_d);
   2298 
   2299   __ Mov(inputs_n_base, inputs_n);
   2300   __ Mov(inputs_n_last_vector,
   2301          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
   2302 
   2303   __ Ldr(vd, MemOperand(inputs_d_base));
   2304 
   2305   __ Ldr(vn, MemOperand(inputs_n_last_vector));
   2306 
   2307   __ Mov(index_n, 0);
   2308   __ Bind(&loop_n);
   2309 
   2310   __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
   2311                                   vn_lane_bytes_log2));
   2312   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
   2313 
   2314   {
   2315     EmissionCheckScope guard(&masm,
   2316         kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3);
   2317     for (unsigned i = 0; i < inputs_imm1_length; i++) {
   2318       for (unsigned j = 0; j < inputs_imm2_length; j++) {
   2319         __ Mov(vres, vd);
   2320         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
   2321         __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
   2322       }
   2323     }
   2324   }
   2325 
   2326   __ Add(index_n, index_n, 1);
   2327   __ Cmp(index_n, inputs_n_length);
   2328   __ B(lo, &loop_n);
   2329 
   2330   END();
   2331   RUN();
   2332   TEARDOWN();
   2333 }
   2334 
   2335 
   2336 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2337 // arrays of rawbit representation of input values. This ensures that
   2338 // exact bit comparisons can be performed.
   2339 template <typename Td, typename Tn>
   2340 static void TestOpImmOpImmNEON(const char * name,
   2341                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
   2342                                const Td inputs_d[],
   2343                                const int inputs_imm1[],
   2344                                unsigned inputs_imm1_length,
   2345                                const Tn inputs_n[],
   2346                                unsigned inputs_n_length,
   2347                                const int inputs_imm2[],
   2348                                unsigned inputs_imm2_length,
   2349                                const Td expected[],
   2350                                unsigned expected_length,
   2351                                VectorFormat vd_form,
   2352                                VectorFormat vn_form) {
   2353   VIXL_ASSERT(inputs_n_length > 0);
   2354   VIXL_ASSERT(inputs_imm1_length > 0);
   2355   VIXL_ASSERT(inputs_imm2_length > 0);
   2356 
   2357   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2358 
   2359   const unsigned results_length = inputs_n_length *
   2360       inputs_imm1_length * inputs_imm2_length;
   2361 
   2362   Td* results = new Td[results_length * vd_lane_count];
   2363   const unsigned lane_bit = sizeof(Td) * 8;
   2364   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   2365 
   2366   TestOpImmOpImmNEON_Helper(helper,
   2367                             reinterpret_cast<uintptr_t>(inputs_d),
   2368                             inputs_imm1,
   2369                             inputs_imm1_length,
   2370                             reinterpret_cast<uintptr_t>(inputs_n),
   2371                             inputs_n_length,
   2372                             inputs_imm2,
   2373                             inputs_imm2_length,
   2374                             reinterpret_cast<uintptr_t>(results),
   2375                             vd_form, vn_form);
   2376 
   2377   if (Test::sim_test_trace()) {
   2378     // Print the results.
   2379     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2380     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2381       printf(" ");
   2382       // Output a separate result for each element of the result vector.
   2383       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2384         unsigned index = lane + (iteration * vd_lane_count);
   2385         printf(" 0x%0*" PRIx64 ",",
   2386                lane_len_in_hex,
   2387                static_cast<uint64_t>(results[index]));
   2388       }
   2389       printf("\n");
   2390     }
   2391 
   2392     printf("};\n");
   2393     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2394            name,
   2395            results_length);
   2396   } else {
   2397     // Check the results.
   2398     VIXL_CHECK(expected_length == results_length);
   2399     unsigned error_count = 0;
   2400     unsigned counted_length = 0;
   2401     const char* padding = "                    ";
   2402     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2403     for (unsigned n = 0; n < inputs_n_length; n++) {
   2404       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
   2405         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
   2406           bool error_in_vector = false;
   2407 
   2408           counted_length++;
   2409 
   2410           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2411             unsigned output_index =
   2412                 (n * inputs_imm1_length *
   2413                  inputs_imm2_length * vd_lane_count) +
   2414                 (imm1 * inputs_imm2_length * vd_lane_count) +
   2415                 (imm2 * vd_lane_count) + lane;
   2416 
   2417             if (results[output_index] != expected[output_index]) {
   2418               error_in_vector = true;
   2419               break;
   2420             }
   2421           }
   2422 
   2423           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2424             printf("%s\n", name);
   2425             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
   2426                    lane_len_in_hex+1, padding,
   2427                    lane_len_in_hex, padding,
   2428                    lane_len_in_hex+1, padding,
   2429                    lane_len_in_hex, padding,
   2430                    lane_len_in_hex+1, padding);
   2431 
   2432             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2433               unsigned output_index =
   2434                 (n * inputs_imm1_length *
   2435                  inputs_imm2_length * vd_lane_count) +
   2436                 (imm1 * inputs_imm2_length * vd_lane_count) +
   2437                 (imm2 * vd_lane_count) + lane;
   2438               unsigned input_index_n = (inputs_n_length - vd_lane_count +
   2439                   n + 1 + lane) % inputs_n_length;
   2440               unsigned input_index_imm1 = imm1;
   2441               unsigned input_index_imm2 = imm2;
   2442 
   2443               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
   2444                 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2445                 results[output_index] !=
   2446                   expected[output_index] ? '*' : ' ',
   2447                 lane_len_in_hex,
   2448                 static_cast<uint64_t>(inputs_d[lane]),
   2449                 lane_len_in_hex,
   2450                 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
   2451                 lane_len_in_hex,
   2452                 static_cast<uint64_t>(inputs_n[input_index_n]),
   2453                 lane_len_in_hex,
   2454                 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
   2455                 lane_len_in_hex,
   2456                 static_cast<uint64_t>(results[output_index]),
   2457                 lane_len_in_hex,
   2458                 static_cast<uint64_t>(expected[output_index]));
   2459             }
   2460           }
   2461         }
   2462       }
   2463     }
   2464     VIXL_ASSERT(counted_length == expected_length);
   2465     if (error_count > kErrorReportLimit) {
   2466       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2467     }
   2468     VIXL_CHECK(error_count == 0);
   2469   }
   2470   delete[] results;
   2471 }
   2472 
   2473 
   2474 // ==== Floating-point tests. ====
   2475 
   2476 
   2477 // Standard floating-point test expansion for both double- and single-precision
   2478 // operations.
   2479 #define STRINGIFY(s) #s
   2480 
   2481 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input)         \
   2482     Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),          \
   2483                &MacroAssembler::mnemonic,                           \
   2484                input, sizeof(input) / sizeof(input[0]),             \
   2485                kExpected_##mnemonic##_##variant,                    \
   2486                kExpectedCount_##mnemonic##_##variant)
   2487 
   2488 #define DEFINE_TEST_FP(mnemonic, type, input)                       \
   2489     TEST(mnemonic##_d) {                                            \
   2490       CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
   2491     }                                                               \
   2492     TEST(mnemonic##_s) {                                            \
   2493       CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
   2494     }
   2495 
   2496 // TODO: Test with a newer version of valgrind.
   2497 //
   2498 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
   2499 // Therefore this test will be exiting though an ASSERT and thus leaking
   2500 // memory.
   2501 DEFINE_TEST_FP(fmadd, 3Op, Basic)
   2502 DEFINE_TEST_FP(fmsub, 3Op, Basic)
   2503 DEFINE_TEST_FP(fnmadd, 3Op, Basic)
   2504 DEFINE_TEST_FP(fnmsub, 3Op, Basic)
   2505 
   2506 DEFINE_TEST_FP(fadd, 2Op, Basic)
   2507 DEFINE_TEST_FP(fdiv, 2Op, Basic)
   2508 DEFINE_TEST_FP(fmax, 2Op, Basic)
   2509 DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
   2510 DEFINE_TEST_FP(fmin, 2Op, Basic)
   2511 DEFINE_TEST_FP(fminnm, 2Op, Basic)
   2512 DEFINE_TEST_FP(fmul, 2Op, Basic)
   2513 DEFINE_TEST_FP(fsub, 2Op, Basic)
   2514 DEFINE_TEST_FP(fnmul, 2Op, Basic)
   2515 
   2516 DEFINE_TEST_FP(fabs, 1Op, Basic)
   2517 DEFINE_TEST_FP(fmov, 1Op, Basic)
   2518 DEFINE_TEST_FP(fneg, 1Op, Basic)
   2519 DEFINE_TEST_FP(fsqrt, 1Op, Basic)
   2520 DEFINE_TEST_FP(frinta, 1Op, Conversions)
   2521 DEFINE_TEST_FP(frinti, 1Op, Conversions)
   2522 DEFINE_TEST_FP(frintm, 1Op, Conversions)
   2523 DEFINE_TEST_FP(frintn, 1Op, Conversions)
   2524 DEFINE_TEST_FP(frintp, 1Op, Conversions)
   2525 DEFINE_TEST_FP(frintx, 1Op, Conversions)
   2526 DEFINE_TEST_FP(frintz, 1Op, Conversions)
   2527 
   2528 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
   2529 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
   2530 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
   2531 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
   2532 
   2533 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
   2534 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
   2535 
   2536 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)                \
   2537     TEST(mnemonic##_xd) {                                           \
   2538       CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
   2539     }                                                               \
   2540     TEST(mnemonic##_xs) {                                           \
   2541       CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
   2542     }                                                               \
   2543     TEST(mnemonic##_wd) {                                           \
   2544       CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
   2545     }                                                               \
   2546     TEST(mnemonic##_ws) {                                           \
   2547       CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
   2548     }
   2549 
   2550 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
   2551 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
   2552 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
   2553 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
   2554 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
   2555 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
   2556 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
   2557 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
   2558 
   2559 // TODO: Scvtf-fixed-point
   2560 // TODO: Scvtf-integer
   2561 // TODO: Ucvtf-fixed-point
   2562 // TODO: Ucvtf-integer
   2563 
   2564 // TODO: Fccmp
   2565 // TODO: Fcsel
   2566 
   2567 
   2568 // ==== NEON Tests. ====
   2569 
   2570 #define CALL_TEST_NEON_HELPER_1Op(mnemonic,                                  \
   2571                                   vdform, vnform,                            \
   2572                                   input_n)                                   \
   2573     Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
   2574                 &MacroAssembler::mnemonic,                                   \
   2575                 input_n,                                                     \
   2576                 (sizeof(input_n) / sizeof(input_n[0])),                      \
   2577                 kExpected_NEON_##mnemonic##_##vdform,                        \
   2578                 kExpectedCount_NEON_##mnemonic##_##vdform,                   \
   2579                 kFormat##vdform,                                             \
   2580                 kFormat##vnform)
   2581 
   2582 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                            \
   2583                                         vdform, vnform,                      \
   2584                                         input_n)                             \
   2585     Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
   2586                                           "_" STRINGIFY(vnform),             \
   2587                       &MacroAssembler::mnemonic,                             \
   2588                       input_n,                                               \
   2589                       (sizeof(input_n) / sizeof(input_n[0])),                \
   2590                       kExpected_NEON_##mnemonic##_##vdform##_##vnform,       \
   2591                       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,  \
   2592                       kFormat##vdform,                                       \
   2593                       kFormat##vnform)
   2594 
   2595 #define CALL_TEST_NEON_HELPER_2Op(mnemonic,                                  \
   2596                                   vdform, vnform, vmform,                    \
   2597                                   input_d, input_n, input_m)                 \
   2598     Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
   2599                 &MacroAssembler::mnemonic,                                   \
   2600                 input_d,                                                     \
   2601                 input_n,                                                     \
   2602                 (sizeof(input_n) / sizeof(input_n[0])),                      \
   2603                 input_m,                                                     \
   2604                 (sizeof(input_m) / sizeof(input_m[0])),                      \
   2605                 kExpected_NEON_##mnemonic##_##vdform,                        \
   2606                 kExpectedCount_NEON_##mnemonic##_##vdform,                   \
   2607                 kFormat##vdform,                                             \
   2608                 kFormat##vnform,                                             \
   2609                 kFormat##vmform)
   2610 
   2611 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                               \
   2612                                      vdform, vnform,                         \
   2613                                      input_n, input_m)                       \
   2614     Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM",      \
   2615                    &MacroAssembler::mnemonic,                                \
   2616                    input_n,                                                  \
   2617                    (sizeof(input_n) / sizeof(input_n[0])),                   \
   2618                    input_m,                                                  \
   2619                    (sizeof(input_m) / sizeof(input_m[0])),                   \
   2620                    kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,            \
   2621                    kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,       \
   2622                    kFormat##vdform,                                          \
   2623                    kFormat##vnform)
   2624 
   2625 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                            \
   2626                                         vdform, vnform, vmform,              \
   2627                                         input_d, input_n, input_m, indices)  \
   2628     TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
   2629         "_" STRINGIFY(vnform) "_" STRINGIFY(vmform),                         \
   2630         &MacroAssembler::mnemonic,                                           \
   2631         input_d,                                                             \
   2632         input_n,                                                             \
   2633         (sizeof(input_n) / sizeof(input_n[0])),                              \
   2634         input_m,                                                             \
   2635         (sizeof(input_m) / sizeof(input_m[0])),                              \
   2636         indices,                                                             \
   2637         (sizeof(indices) / sizeof(indices[0])),                              \
   2638         kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,          \
   2639         kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,     \
   2640         kFormat##vdform,                                                     \
   2641         kFormat##vnform,                                                     \
   2642         kFormat##vmform)
   2643 
   2644 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                             \
   2645                                          mnemonic,                           \
   2646                                          vdform, vnform,                     \
   2647                                          input_d, input_imm1,                \
   2648                                          input_n, input_imm2)                \
   2649     TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),            \
   2650                        helper,                                               \
   2651                        input_d,                                              \
   2652                        input_imm1,                                           \
   2653                        (sizeof(input_imm1) / sizeof(input_imm1[0])),         \
   2654                        input_n,                                              \
   2655                        (sizeof(input_n) / sizeof(input_n[0])),               \
   2656                        input_imm2,                                           \
   2657                        (sizeof(input_imm2) / sizeof(input_imm2[0])),         \
   2658                        kExpected_NEON_##mnemonic##_##vdform,                 \
   2659                        kExpectedCount_NEON_##mnemonic##_##vdform,            \
   2660                        kFormat##vdform,                                      \
   2661                        kFormat##vnform)
   2662 
   2663 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input)                \
   2664     CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
   2665                               variant, variant,                              \
   2666                               input)
   2667 
   2668 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                       \
   2669     TEST(mnemonic##_8B) {                                                    \
   2670       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);         \
   2671     }                                                                        \
   2672     TEST(mnemonic##_16B) {                                                   \
   2673       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input);        \
   2674     }
   2675 
   2676 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)                        \
   2677     TEST(mnemonic##_4H) {                                                    \
   2678       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input);        \
   2679     }                                                                        \
   2680     TEST(mnemonic##_8H) {                                                    \
   2681       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input);        \
   2682     }
   2683 
   2684 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                        \
   2685     TEST(mnemonic##_2S) {                                                    \
   2686       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input);        \
   2687     }                                                                        \
   2688     TEST(mnemonic##_4S) {                                                    \
   2689       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input);        \
   2690     }
   2691 
   2692 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                           \
   2693     DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                           \
   2694     DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
   2695 
   2696 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                         \
   2697     DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                               \
   2698     DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
   2699 
   2700 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                              \
   2701     DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                             \
   2702     TEST(mnemonic##_2D) {                                                    \
   2703       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
   2704     }
   2705 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                           \
   2706     DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                            \
   2707     TEST(mnemonic##_2D) {                                                    \
   2708       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
   2709     }
   2710 
   2711 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                           \
   2712     TEST(mnemonic##_2S) {                                                    \
   2713       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);         \
   2714     }                                                                        \
   2715     TEST(mnemonic##_4S) {                                                    \
   2716       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);         \
   2717     }                                                                        \
   2718     TEST(mnemonic##_2D) {                                                    \
   2719       CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input);        \
   2720     }
   2721 
   2722 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)                    \
   2723     TEST(mnemonic##_S) {                                                     \
   2724       CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);          \
   2725     }                                                                        \
   2726     TEST(mnemonic##_D) {                                                     \
   2727       CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);         \
   2728     }
   2729 
   2730 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                     \
   2731     TEST(mnemonic##_B) {                                                     \
   2732       CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input);          \
   2733     }
   2734 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                     \
   2735     TEST(mnemonic##_H) {                                                     \
   2736       CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input);         \
   2737     }
   2738 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                     \
   2739     TEST(mnemonic##_S) {                                                     \
   2740       CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input);         \
   2741     }
   2742 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)                     \
   2743     TEST(mnemonic##_D) {                                                     \
   2744       CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input);         \
   2745     }
   2746 
   2747 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input)                       \
   2748     DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                         \
   2749     DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                         \
   2750     DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
   2751     DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
   2752 
   2753 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input)                    \
   2754     DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
   2755     DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
   2756 
   2757 
   2758 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n)    \
   2759     CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                                \
   2760                                     vd_form, vn_form,                        \
   2761                                     input_n)
   2762 
   2763 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                             \
   2764     TEST(mnemonic##_B_8B) {                                                  \
   2765       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);     \
   2766     }                                                                        \
   2767     TEST(mnemonic##_B_16B) {                                                 \
   2768       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input);    \
   2769     }                                                                        \
   2770     TEST(mnemonic##_H_4H) {                                                  \
   2771       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input);    \
   2772     }                                                                        \
   2773     TEST(mnemonic##_H_8H) {                                                  \
   2774       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input);    \
   2775     }                                                                        \
   2776     TEST(mnemonic##_S_4S) {                                                  \
   2777       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input);    \
   2778     }
   2779 
   2780 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                        \
   2781     TEST(mnemonic##_H_8B) {                                                  \
   2782       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);     \
   2783     }                                                                        \
   2784     TEST(mnemonic##_H_16B) {                                                 \
   2785       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input);    \
   2786     }                                                                        \
   2787     TEST(mnemonic##_S_4H) {                                                  \
   2788       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input);    \
   2789     }                                                                        \
   2790     TEST(mnemonic##_S_8H) {                                                  \
   2791       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input);    \
   2792     }                                                                        \
   2793     TEST(mnemonic##_D_4S) {                                                  \
   2794       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input);    \
   2795     }
   2796 
   2797 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                          \
   2798     TEST(mnemonic##_S_4S) {                                                  \
   2799       CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);     \
   2800     }
   2801 
   2802 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic,                                \
   2803                                     vdform, vnform,                          \
   2804                                     input_n)                                 \
   2805     CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
   2806                               vdform, vnform,                                \
   2807                               input_n)
   2808 
   2809 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                         \
   2810     TEST(mnemonic##_4H) {                                                    \
   2811       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);     \
   2812     }                                                                        \
   2813     TEST(mnemonic##_8H) {                                                    \
   2814       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input);    \
   2815     }                                                                        \
   2816     TEST(mnemonic##_2S) {                                                    \
   2817       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input);    \
   2818     }                                                                        \
   2819     TEST(mnemonic##_4S) {                                                    \
   2820       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input);    \
   2821     }                                                                        \
   2822     TEST(mnemonic##_1D) {                                                    \
   2823       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input);    \
   2824     }                                                                        \
   2825     TEST(mnemonic##_2D) {                                                    \
   2826       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input);    \
   2827     }
   2828 
   2829 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                       \
   2830     TEST(mnemonic##_8B) {                                                    \
   2831       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);    \
   2832     }                                                                        \
   2833     TEST(mnemonic##_4H) {                                                    \
   2834       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);    \
   2835     }                                                                        \
   2836     TEST(mnemonic##_2S) {                                                    \
   2837       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);    \
   2838     }                                                                        \
   2839     TEST(mnemonic##2_16B) {                                                  \
   2840       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\
   2841     }                                                                        \
   2842     TEST(mnemonic##2_8H) {                                                   \
   2843       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
   2844     }                                                                        \
   2845     TEST(mnemonic##2_4S) {                                                   \
   2846       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
   2847     }
   2848 
   2849 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                      \
   2850     TEST(mnemonic##_4S) {                                                    \
   2851       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);   \
   2852     }                                                                        \
   2853     TEST(mnemonic##_2D) {                                                    \
   2854       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);     \
   2855     }                                                                        \
   2856     TEST(mnemonic##2_4S) {                                                   \
   2857       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\
   2858     }                                                                        \
   2859     TEST(mnemonic##2_2D) {                                                   \
   2860       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);  \
   2861     }
   2862 
   2863 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                    \
   2864     TEST(mnemonic##_4H) {                                                    \
   2865       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
   2866     }                                                                        \
   2867     TEST(mnemonic##_2S) {                                                    \
   2868       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
   2869     }                                                                        \
   2870     TEST(mnemonic##2_8H) {                                                   \
   2871       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
   2872     }                                                                        \
   2873     TEST(mnemonic##2_4S) {                                                   \
   2874       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
   2875     }
   2876 
   2877 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)                 \
   2878     TEST(mnemonic##_2S) {                                                    \
   2879       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
   2880     }                                                                        \
   2881     TEST(mnemonic##2_4S) {                                                   \
   2882       CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
   2883     }
   2884 
   2885 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)                \
   2886     TEST(mnemonic##_B) {                                                     \
   2887       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input);      \
   2888     }                                                                        \
   2889     TEST(mnemonic##_H) {                                                     \
   2890       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input);      \
   2891     }                                                                        \
   2892     TEST(mnemonic##_S) {                                                     \
   2893       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input);      \
   2894     }
   2895 
   2896 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)                 \
   2897     TEST(mnemonic##_S) {                                                     \
   2898       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);      \
   2899     }                                                                        \
   2900     TEST(mnemonic##_D) {                                                     \
   2901       CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);     \
   2902     }
   2903 
   2904 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) {  \
   2905     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
   2906                               variant, variant, variant,                     \
   2907                               input_d, input_nm, input_nm);                  \
   2908     }
   2909 
   2910 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                       \
   2911     TEST(mnemonic##_8B) {                                                    \
   2912       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B,                              \
   2913                                   kInput8bitsAccDestination,                 \
   2914                                   kInput8bits##input);                       \
   2915     }                                                                        \
   2916     TEST(mnemonic##_16B) {                                                   \
   2917       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B,                             \
   2918                                   kInput8bitsAccDestination,                 \
   2919                                   kInput8bits##input);                       \
   2920     }                                                                        \
   2921 
   2922 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)                           \
   2923     TEST(mnemonic##_4H) {                                                    \
   2924       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H,                              \
   2925                                   kInput16bitsAccDestination,                \
   2926                                   kInput16bits##input);                      \
   2927     }                                                                        \
   2928     TEST(mnemonic##_8H) {                                                    \
   2929       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H,                              \
   2930                                   kInput16bitsAccDestination,                \
   2931                                   kInput16bits##input);                      \
   2932     }                                                                        \
   2933     TEST(mnemonic##_2S) {                                                    \
   2934       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
   2935                                   kInput32bitsAccDestination,                \
   2936                                   kInput32bits##input);                      \
   2937     }                                                                        \
   2938     TEST(mnemonic##_4S) {                                                    \
   2939       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
   2940                                   kInput32bitsAccDestination,                \
   2941                                   kInput32bits##input);                      \
   2942     }
   2943 
   2944 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                         \
   2945     DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                           \
   2946     DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
   2947 
   2948 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)                              \
   2949     DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                             \
   2950     TEST(mnemonic##_2D) {                                                    \
   2951       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
   2952                                   kInput64bitsAccDestination,                \
   2953                                   kInput64bits##input);                      \
   2954     }
   2955 
   2956 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)                           \
   2957     TEST(mnemonic##_2S) {                                                    \
   2958       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
   2959                                   kInputFloatAccDestination,                 \
   2960                                   kInputFloat##input);                       \
   2961     }                                                                        \
   2962     TEST(mnemonic##_4S) {                                                    \
   2963       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
   2964                                   kInputFloatAccDestination,                 \
   2965                                   kInputFloat##input);                       \
   2966     }                                                                        \
   2967     TEST(mnemonic##_2D) {                                                    \
   2968       CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
   2969                                   kInputDoubleAccDestination,                \
   2970                                   kInputDouble##input);                      \
   2971     }
   2972 
   2973 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)                     \
   2974     TEST(mnemonic##_D) {                                                     \
   2975       CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
   2976                                   kInput64bitsAccDestination,                \
   2977                                   kInput64bits##input);                      \
   2978     }
   2979 
   2980 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)                    \
   2981     TEST(mnemonic##_H) {                                                     \
   2982       CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
   2983                                   kInput16bitsAccDestination,                \
   2984                                   kInput16bits##input);                      \
   2985     }                                                                        \
   2986     TEST(mnemonic##_S) {                                                     \
   2987       CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
   2988                                   kInput32bitsAccDestination,                \
   2989                                   kInput32bits##input);                      \
   2990     }                                                                        \
   2991 
   2992 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)                       \
   2993     TEST(mnemonic##_B) {                                                     \
   2994       CALL_TEST_NEON_HELPER_3SAME(mnemonic, B,                               \
   2995                                   kInput8bitsAccDestination,                 \
   2996                                   kInput8bits##input);                       \
   2997     }                                                                        \
   2998     TEST(mnemonic##_H) {                                                     \
   2999       CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
   3000                                   kInput16bitsAccDestination,                \
   3001                                   kInput16bits##input);                      \
   3002     }                                                                        \
   3003     TEST(mnemonic##_S) {                                                     \
   3004       CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
   3005                                   kInput32bitsAccDestination,                \
   3006                                   kInput32bits##input);                      \
   3007     }                                                                        \
   3008     TEST(mnemonic##_D) {                                                     \
   3009       CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
   3010                                   kInput64bitsAccDestination,                \
   3011                                   kInput64bits##input);                      \
   3012     }
   3013 
   3014 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)                    \
   3015     TEST(mnemonic##_S) {                                                     \
   3016       CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
   3017                                   kInputFloatAccDestination,                 \
   3018                                   kInputFloat##input);                       \
   3019     }                                                                        \
   3020     TEST(mnemonic##_D) {                                                     \
   3021       CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
   3022                                   kInputDoubleAccDestination,                \
   3023                                   kInputDouble##input);                      \
   3024     }
   3025 
   3026 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                                \
   3027                                     vdform, vnform, vmform,                  \
   3028                                     input_d, input_n, input_m) {             \
   3029     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
   3030                               vdform, vnform, vmform,                        \
   3031                               input_d, input_n, input_m);                    \
   3032     }
   3033 
   3034 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                      \
   3035     TEST(mnemonic##_8H) {                                                    \
   3036       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B,                      \
   3037                                   kInput16bitsAccDestination,                \
   3038                                   kInput8bits##input, kInput8bits##input);   \
   3039     }                                                                        \
   3040     TEST(mnemonic##2_8H) {                                                   \
   3041       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B,                 \
   3042                                   kInput16bitsAccDestination,                \
   3043                                   kInput8bits##input, kInput8bits##input);   \
   3044     }
   3045 
   3046 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                      \
   3047     TEST(mnemonic##_4S) {                                                    \
   3048       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H,                      \
   3049                                   kInput32bitsAccDestination,                \
   3050                                   kInput16bits##input, kInput16bits##input); \
   3051     }                                                                        \
   3052     TEST(mnemonic##2_4S) {                                                   \
   3053       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H,                   \
   3054                                   kInput32bitsAccDestination,                \
   3055                                   kInput16bits##input, kInput16bits##input); \
   3056     }
   3057 
   3058 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)                      \
   3059     TEST(mnemonic##_2D) {                                                    \
   3060       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S,                      \
   3061                                   kInput64bitsAccDestination,                \
   3062                                   kInput32bits##input, kInput32bits##input); \
   3063     }                                                                        \
   3064     TEST(mnemonic##2_2D) {                                                   \
   3065       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S,                   \
   3066                                   kInput64bitsAccDestination,                \
   3067                                   kInput32bits##input, kInput32bits##input); \
   3068     }
   3069 
   3070 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input)                      \
   3071     DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
   3072     DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
   3073 
   3074 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input)                         \
   3075     DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                          \
   3076     DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
   3077     DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
   3078 
   3079 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                \
   3080     TEST(mnemonic##_S) {                                                     \
   3081       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H,                         \
   3082                                   kInput32bitsAccDestination,                \
   3083                                   kInput16bits##input,                       \
   3084                                   kInput16bits##input);                      \
   3085     }
   3086 
   3087 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)                \
   3088     TEST(mnemonic##_D) {                                                     \
   3089       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S,                         \
   3090                                   kInput64bitsAccDestination,                \
   3091                                   kInput32bits##input,                       \
   3092                                   kInput32bits##input);                      \
   3093     }
   3094 
   3095 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input)               \
   3096     DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                    \
   3097     DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
   3098 
   3099 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)                         \
   3100     TEST(mnemonic##_8H) {                                                    \
   3101       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B,                      \
   3102                                   kInput16bitsAccDestination,                \
   3103                                   kInput16bits##input, kInput8bits##input);  \
   3104     }                                                                        \
   3105     TEST(mnemonic##_4S) {                                                    \
   3106       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H,                      \
   3107                                   kInput32bitsAccDestination,                \
   3108                                   kInput32bits##input, kInput16bits##input); \
   3109     }                                                                        \
   3110     TEST(mnemonic##_2D) {                                                    \
   3111       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S,                      \
   3112                                   kInput64bitsAccDestination,                \
   3113                                   kInput64bits##input, kInput32bits##input); \
   3114     }                                                                        \
   3115     TEST(mnemonic##2_8H) {                                                   \
   3116       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B,                  \
   3117                                   kInput16bitsAccDestination,                \
   3118                                   kInput16bits##input, kInput8bits##input);  \
   3119     }                                                                        \
   3120     TEST(mnemonic##2_4S) {                                                   \
   3121       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H,                   \
   3122                                   kInput32bitsAccDestination,                \
   3123                                   kInput32bits##input, kInput16bits##input); \
   3124     }                                                                        \
   3125     TEST(mnemonic##2_2D) {                                                   \
   3126       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S,                   \
   3127                                   kInput64bitsAccDestination,                \
   3128                                   kInput64bits##input, kInput32bits##input); \
   3129     }
   3130 
   3131 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)                       \
   3132     TEST(mnemonic##_8B) {                                                    \
   3133       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H,                      \
   3134                                   kInput8bitsAccDestination,                 \
   3135                                   kInput16bits##input, kInput16bits##input); \
   3136     }                                                                        \
   3137     TEST(mnemonic##_4H) {                                                    \
   3138       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S,                      \
   3139                                   kInput16bitsAccDestination,                \
   3140                                   kInput32bits##input, kInput32bits##input); \
   3141     }                                                                        \
   3142     TEST(mnemonic##_2S) {                                                    \
   3143       CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D,                      \
   3144                                   kInput32bitsAccDestination,                \
   3145                                   kInput64bits##input, kInput64bits##input); \
   3146     }                                                                        \
   3147     TEST(mnemonic##2_16B) {                                                  \
   3148       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H,                  \
   3149                                   kInput8bitsAccDestination,                 \
   3150                                   kInput16bits##input, kInput16bits##input); \
   3151     }                                                                        \
   3152     TEST(mnemonic##2_8H) {                                                   \
   3153       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S,                   \
   3154                                   kInput16bitsAccDestination,                \
   3155                                   kInput32bits##input, kInput32bits##input); \
   3156     }                                                                        \
   3157     TEST(mnemonic##2_4S) {                                                   \
   3158       CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D,                   \
   3159                                   kInput32bitsAccDestination,                \
   3160                                   kInput64bits##input, kInput64bits##input); \
   3161     }
   3162 
   3163 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
   3164                                      vdform, vnform,                         \
   3165                                      input_n,                                \
   3166                                      input_imm) {                            \
   3167     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                                   \
   3168                                  vdform, vnform,                             \
   3169                                  input_n, input_imm);                        \
   3170     }
   3171 
   3172 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)                  \
   3173     TEST(mnemonic##_8B_2OPIMM) {                                             \
   3174       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3175                                    8B, 8B,                                   \
   3176                                    kInput8bits##input,                       \
   3177                                    kInput8bitsImm##input_imm);               \
   3178     }                                                                        \
   3179     TEST(mnemonic##_16B_2OPIMM) {                                            \
   3180       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3181                                    16B, 16B,                                 \
   3182                                    kInput8bits##input,                       \
   3183                                    kInput8bitsImm##input_imm);               \
   3184     }                                                                        \
   3185     TEST(mnemonic##_4H_2OPIMM) {                                             \
   3186       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3187                                    4H, 4H,                                   \
   3188                                    kInput16bits##input,                      \
   3189                                    kInput16bitsImm##input_imm);              \
   3190     }                                                                        \
   3191     TEST(mnemonic##_8H_2OPIMM) {                                             \
   3192       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3193                                    8H, 8H,                                   \
   3194                                    kInput16bits##input,                      \
   3195                                    kInput16bitsImm##input_imm);              \
   3196     }                                                                        \
   3197     TEST(mnemonic##_2S_2OPIMM) {                                             \
   3198       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3199                                    2S, 2S,                                   \
   3200                                    kInput32bits##input,                      \
   3201                                    kInput32bitsImm##input_imm);              \
   3202     }                                                                        \
   3203     TEST(mnemonic##_4S_2OPIMM) {                                             \
   3204       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3205                                    4S, 4S,                                   \
   3206                                    kInput32bits##input,                      \
   3207                                    kInput32bitsImm##input_imm);              \
   3208     }                                                                        \
   3209     TEST(mnemonic##_2D_2OPIMM) {                                             \
   3210       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3211                                    2D, 2D,                                   \
   3212                                    kInput64bits##input,                      \
   3213                                    kInput64bitsImm##input_imm);              \
   3214     }
   3215 
   3216 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm)             \
   3217     TEST(mnemonic##_8B_2OPIMM) {                                             \
   3218       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3219                                    8B, B,                                    \
   3220                                    kInput8bits##input,                       \
   3221                                    kInput8bitsImm##input_imm);               \
   3222     }                                                                        \
   3223     TEST(mnemonic##_16B_2OPIMM) {                                            \
   3224       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3225                                    16B, B,                                   \
   3226                                    kInput8bits##input,                       \
   3227                                    kInput8bitsImm##input_imm);               \
   3228     }                                                                        \
   3229     TEST(mnemonic##_4H_2OPIMM) {                                             \
   3230       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3231                                    4H, H,                                    \
   3232                                    kInput16bits##input,                      \
   3233                                    kInput16bitsImm##input_imm);              \
   3234     }                                                                        \
   3235     TEST(mnemonic##_8H_2OPIMM) {                                             \
   3236       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3237                                    8H, H,                                    \
   3238                                    kInput16bits##input,                      \
   3239                                    kInput16bitsImm##input_imm);              \
   3240     }                                                                        \
   3241     TEST(mnemonic##_2S_2OPIMM) {                                             \
   3242       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3243                                    2S, S,                                    \
   3244                                    kInput32bits##input,                      \
   3245                                    kInput32bitsImm##input_imm);              \
   3246     }                                                                        \
   3247     TEST(mnemonic##_4S_2OPIMM) {                                             \
   3248       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3249                                    4S, S,                                    \
   3250                                    kInput32bits##input,                      \
   3251                                    kInput32bitsImm##input_imm);              \
   3252     }                                                                        \
   3253     TEST(mnemonic##_2D_2OPIMM) {                                             \
   3254       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3255                                    2D, D,                                    \
   3256                                    kInput64bits##input,                      \
   3257                                    kInput64bitsImm##input_imm);              \
   3258     }
   3259 
   3260 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm)           \
   3261     TEST(mnemonic##_8B_2OPIMM) {                                             \
   3262       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3263                                    8B, 8H,                                   \
   3264                                    kInput16bits##input,                      \
   3265                                    kInput8bitsImm##input_imm);               \
   3266     }                                                                        \
   3267     TEST(mnemonic##_4H_2OPIMM) {                                             \
   3268       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3269                                    4H, 4S,                                   \
   3270                                    kInput32bits##input,                      \
   3271                                    kInput16bitsImm##input_imm);              \
   3272     }                                                                        \
   3273     TEST(mnemonic##_2S_2OPIMM) {                                             \
   3274       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3275                                    2S, 2D,                                   \
   3276                                    kInput64bits##input,                      \
   3277                                    kInput32bitsImm##input_imm);              \
   3278     }                                                                        \
   3279     TEST(mnemonic##2_16B_2OPIMM) {                                           \
   3280       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
   3281                                    16B, 8H,                                  \
   3282                                    kInput16bits##input,                      \
   3283                                    kInput8bitsImm##input_imm);               \
   3284     }                                                                        \
   3285     TEST(mnemonic##2_8H_2OPIMM) {                                            \
   3286       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
   3287                                    8H, 4S,                                   \
   3288                                    kInput32bits##input,                      \
   3289                                    kInput16bitsImm##input_imm);              \
   3290     }                                                                        \
   3291     TEST(mnemonic##2_4S_2OPIMM) {                                            \
   3292       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
   3293                                    4S, 2D,                                   \
   3294                                    kInput64bits##input,                      \
   3295                                    kInput32bitsImm##input_imm);              \
   3296     }
   3297 
   3298 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm)    \
   3299     TEST(mnemonic##_B_2OPIMM) {                                              \
   3300       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3301                                    B, H,                                     \
   3302                                    kInput16bits##input,                      \
   3303                                    kInput8bitsImm##input_imm);               \
   3304     }                                                                        \
   3305     TEST(mnemonic##_H_2OPIMM) {                                              \
   3306       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3307                                    H, S,                                     \
   3308                                    kInput32bits##input,                      \
   3309                                    kInput16bitsImm##input_imm);              \
   3310     }                                                                        \
   3311     TEST(mnemonic##_S_2OPIMM) {                                              \
   3312       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3313                                    S, D,                                     \
   3314                                    kInput64bits##input,                      \
   3315                                    kInput32bitsImm##input_imm);              \
   3316     }
   3317 
   3318 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm)        \
   3319     TEST(mnemonic##_2S_2OPIMM) {                                             \
   3320       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3321           mnemonic,                                                          \
   3322           2S, 2S,                                                            \
   3323           kInputFloat##Basic,                                                \
   3324           kInputDoubleImm##input_imm)                                        \
   3325     }                                                                        \
   3326     TEST(mnemonic##_4S_2OPIMM) {                                             \
   3327       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3328           mnemonic,                                                          \
   3329           4S, 4S,                                                            \
   3330           kInputFloat##input,                                                \
   3331           kInputDoubleImm##input_imm);                                       \
   3332     }                                                                        \
   3333     TEST(mnemonic##_2D_2OPIMM) {                                             \
   3334       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3335           mnemonic,                                                          \
   3336           2D, 2D,                                                            \
   3337           kInputDouble##input,                                               \
   3338           kInputDoubleImm##input_imm);                                       \
   3339     }
   3340 
   3341 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm)               \
   3342     TEST(mnemonic##_2S_2OPIMM) {                                             \
   3343       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3344           mnemonic,                                                          \
   3345           2S, 2S,                                                            \
   3346           kInputFloat##Basic,                                                \
   3347           kInput32bitsImm##input_imm)                                        \
   3348     }                                                                        \
   3349     TEST(mnemonic##_4S_2OPIMM) {                                             \
   3350       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3351           mnemonic,                                                          \
   3352           4S, 4S,                                                            \
   3353           kInputFloat##input,                                                \
   3354           kInput32bitsImm##input_imm)                                        \
   3355     }                                                                        \
   3356     TEST(mnemonic##_2D_2OPIMM) {                                             \
   3357       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3358           mnemonic,                                                          \
   3359           2D, 2D,                                                            \
   3360           kInputDouble##input,                                               \
   3361           kInput64bitsImm##input_imm)                                        \
   3362     }
   3363 
   3364 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm)        \
   3365     TEST(mnemonic##_S_2OPIMM) {                                              \
   3366       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3367           mnemonic,                                                          \
   3368           S, S,                                                              \
   3369           kInputFloat##Basic,                                                \
   3370           kInput32bitsImm##input_imm)                                        \
   3371     }                                                                        \
   3372     TEST(mnemonic##_D_2OPIMM) {                                              \
   3373       CALL_TEST_NEON_HELPER_2OPIMM(                                          \
   3374           mnemonic,                                                          \
   3375           D, D,                                                              \
   3376           kInputDouble##input,                                               \
   3377           kInput64bitsImm##input_imm)                                        \
   3378     }
   3379 
   3380 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm)               \
   3381     TEST(mnemonic##_2S_2OPIMM) {                                             \
   3382       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3383                                    2S, 2S,                                   \
   3384                                    kInput32bits##input,                      \
   3385                                    kInput32bitsImm##input_imm);              \
   3386     }                                                                        \
   3387     TEST(mnemonic##_4S_2OPIMM) {                                             \
   3388       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3389                                    4S, 4S,                                   \
   3390                                    kInput32bits##input,                      \
   3391                                    kInput32bitsImm##input_imm);              \
   3392     }                                                                        \
   3393     TEST(mnemonic##_2D_2OPIMM) {                                             \
   3394       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3395                                    2D, 2D,                                   \
   3396                                    kInput64bits##input,                      \
   3397                                    kInput64bitsImm##input_imm);              \
   3398     }
   3399 
   3400 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)         \
   3401     TEST(mnemonic##_D_2OPIMM) {                                              \
   3402       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3403                                    D, D,                                     \
   3404                                    kInput64bits##input,                      \
   3405                                    kInput64bitsImm##input_imm);              \
   3406     }
   3407 
   3408 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)        \
   3409     TEST(mnemonic##_S_2OPIMM) {                                              \
   3410       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3411                                    S, S,                                     \
   3412                                    kInput32bits##input,                      \
   3413                                    kInput32bitsImm##input_imm);              \
   3414     }                                                                        \
   3415     DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
   3416 
   3417 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)      \
   3418     TEST(mnemonic##_D_2OPIMM) {                                              \
   3419       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3420                                    D, D,                                     \
   3421                                    kInputDouble##input,                      \
   3422                                    kInputDoubleImm##input_imm);              \
   3423     }
   3424 
   3425 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm)     \
   3426     TEST(mnemonic##_S_2OPIMM) {                                              \
   3427       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3428                                    S, S,                                     \
   3429                                    kInputFloat##input,                       \
   3430                                    kInputDoubleImm##input_imm);              \
   3431     }                                                                        \
   3432     DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
   3433 
   3434 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm)           \
   3435     TEST(mnemonic##_B_2OPIMM) {                                              \
   3436       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3437                                    B, B,                                     \
   3438                                    kInput8bits##input,                       \
   3439                                    kInput8bitsImm##input_imm);               \
   3440     }                                                                        \
   3441     TEST(mnemonic##_H_2OPIMM) {                                              \
   3442       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3443                                    H, H,                                     \
   3444                                    kInput16bits##input,                      \
   3445                                    kInput16bitsImm##input_imm);              \
   3446     }                                                                        \
   3447     DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
   3448 
   3449 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm)             \
   3450     TEST(mnemonic##_8H_2OPIMM) {                                             \
   3451       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3452                                    8H, 8B,                                   \
   3453                                    kInput8bits##input,                       \
   3454                                    kInput8bitsImm##input_imm);               \
   3455     }                                                                        \
   3456     TEST(mnemonic##_4S_2OPIMM) {                                             \
   3457       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3458                                    4S, 4H,                                   \
   3459                                    kInput16bits##input,                      \
   3460                                    kInput16bitsImm##input_imm);              \
   3461     }                                                                        \
   3462     TEST(mnemonic##_2D_2OPIMM) {                                             \
   3463       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
   3464                                    2D, 2S,                                   \
   3465                                    kInput32bits##input,                      \
   3466                                    kInput32bitsImm##input_imm);              \
   3467     }                                                                        \
   3468     TEST(mnemonic##2_8H_2OPIMM) {                                            \
   3469       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
   3470                                    8H, 16B,                                  \
   3471                                    kInput8bits##input,                       \
   3472                                    kInput8bitsImm##input_imm);               \
   3473     }                                                                        \
   3474     TEST(mnemonic##2_4S_2OPIMM) {                                            \
   3475       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
   3476                                    4S, 8H,                                   \
   3477                                    kInput16bits##input,                      \
   3478                                    kInput16bitsImm##input_imm);              \
   3479     }                                                                        \
   3480     TEST(mnemonic##2_2D_2OPIMM) {                                            \
   3481       CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
   3482                                    2D, 4S,                                   \
   3483                                    kInput32bits##input,                      \
   3484                                    kInput32bitsImm##input_imm);              \
   3485     }
   3486 
   3487 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                            \
   3488                                         vdform, vnform, vmform,              \
   3489                                         input_d, input_n,                    \
   3490                                         input_m, indices) {                  \
   3491     CALL_TEST_NEON_HELPER_ByElement(mnemonic,                                \
   3492                                     vdform, vnform, vmform,                  \
   3493                                     input_d, input_n,                        \
   3494                                     input_m, indices);                       \
   3495     }
   3496 
   3497 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m)      \
   3498     TEST(mnemonic##_4H_4H_H) {                                               \
   3499       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3500                                       4H, 4H, H,                             \
   3501                                       kInput16bits##input_d,                 \
   3502                                       kInput16bits##input_n,                 \
   3503                                       kInput16bits##input_m,                 \
   3504                                       kInputHIndices);                       \
   3505     }                                                                        \
   3506     TEST(mnemonic##_8H_8H_H) {                                               \
   3507       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3508                                       8H, 8H, H,                             \
   3509                                       kInput16bits##input_d,                 \
   3510                                       kInput16bits##input_n,                 \
   3511                                       kInput16bits##input_m,                 \
   3512                                       kInputHIndices);                       \
   3513     }                                                                        \
   3514     TEST(mnemonic##_2S_2S_S) {                                               \
   3515       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3516                                       2S, 2S, S,                             \
   3517                                       kInput32bits##input_d,                 \
   3518                                       kInput32bits##input_n,                 \
   3519                                       kInput32bits##input_m,                 \
   3520                                       kInputSIndices);                       \
   3521     }                                                                        \
   3522     TEST(mnemonic##_4S_4S_S) {                                               \
   3523       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3524                                       4S, 4S, S,                             \
   3525                                       kInput32bits##input_d,                 \
   3526                                       kInput32bits##input_n,                 \
   3527                                       kInput32bits##input_m,                 \
   3528                                       kInputSIndices);                       \
   3529     }
   3530 
   3531 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic,                          \
   3532                                           input_d, input_n, input_m)         \
   3533     TEST(mnemonic##_H_H_H) {                                                 \
   3534       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3535                                       H, H, H,                               \
   3536                                       kInput16bits##input_d,                 \
   3537                                       kInput16bits##input_n,                 \
   3538                                       kInput16bits##input_m,                 \
   3539                                       kInputHIndices);                       \
   3540     }                                                                        \
   3541     TEST(mnemonic##_S_S_S) {                                                 \
   3542       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3543                                       S, S, S,                               \
   3544                                       kInput32bits##input_d,                 \
   3545                                       kInput32bits##input_n,                 \
   3546                                       kInput32bits##input_m,                 \
   3547                                       kInputSIndices);                       \
   3548     }
   3549 
   3550 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m)   \
   3551     TEST(mnemonic##_2S_2S_S) {                                               \
   3552       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3553                                       2S, 2S, S,                             \
   3554                                       kInputFloat##input_d,                  \
   3555                                       kInputFloat##input_n,                  \
   3556                                       kInputFloat##input_m,                  \
   3557                                       kInputSIndices);                       \
   3558     }                                                                        \
   3559     TEST(mnemonic##_4S_4S_S) {                                               \
   3560       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3561                                       4S, 4S, S,                             \
   3562                                       kInputFloat##input_d,                  \
   3563                                       kInputFloat##input_n,                  \
   3564                                       kInputFloat##input_m,                  \
   3565                                       kInputSIndices);                       \
   3566     }                                                                        \
   3567     TEST(mnemonic##_2D_2D_D) {                                               \
   3568       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3569                                       2D, 2D, D,                             \
   3570                                       kInputDouble##input_d,                 \
   3571                                       kInputDouble##input_n,                 \
   3572                                       kInputDouble##input_m,                 \
   3573                                       kInputDIndices);                       \
   3574     }                                                                        \
   3575 
   3576 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m)  \
   3577     TEST(mnemonic##_S_S_S) {                                                 \
   3578       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3579                                       S, S, S,                               \
   3580                                       kInputFloat##inp_d,                    \
   3581                                       kInputFloat##inp_n,                    \
   3582                                       kInputFloat##inp_m,                    \
   3583                                       kInputSIndices);                       \
   3584     }                                                                        \
   3585     TEST(mnemonic##_D_D_D) {                                                 \
   3586       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3587                                       D, D, D,                               \
   3588                                       kInputDouble##inp_d,                   \
   3589                                       kInputDouble##inp_n,                   \
   3590                                       kInputDouble##inp_m,                   \
   3591                                       kInputDIndices);                       \
   3592     }                                                                        \
   3593 
   3594 
   3595 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
   3596     TEST(mnemonic##_4S_4H_H) {                                               \
   3597       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3598                                       4S, 4H, H,                             \
   3599                                       kInput32bits##input_d,                 \
   3600                                       kInput16bits##input_n,                 \
   3601                                       kInput16bits##input_m,                 \
   3602                                       kInputHIndices);                       \
   3603     }                                                                        \
   3604     TEST(mnemonic##2_4S_8H_H) {                                              \
   3605       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
   3606                                       4S, 8H, H,                             \
   3607                                       kInput32bits##input_d,                 \
   3608                                       kInput16bits##input_n,                 \
   3609                                       kInput16bits##input_m,                 \
   3610                                       kInputHIndices);                       \
   3611     }                                                                        \
   3612     TEST(mnemonic##_2D_2S_S) {                                               \
   3613       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3614                                       2D, 2S, S,                             \
   3615                                       kInput64bits##input_d,                 \
   3616                                       kInput32bits##input_n,                 \
   3617                                       kInput32bits##input_m,                 \
   3618                                       kInputSIndices);                       \
   3619     }                                                                        \
   3620     TEST(mnemonic##2_2D_4S_S) {                                              \
   3621       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
   3622                                       2D, 4S, S,                             \
   3623                                       kInput64bits##input_d,                 \
   3624                                       kInput32bits##input_n,                 \
   3625                                       kInput32bits##input_m,                 \
   3626                                       kInputSIndices);                       \
   3627     }
   3628 
   3629 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,                     \
   3630                                                input_d, input_n, input_m)    \
   3631     TEST(mnemonic##_S_H_H) {                                                 \
   3632       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3633                                       S, H, H,                               \
   3634                                       kInput32bits##input_d,                 \
   3635                                       kInput16bits##input_n,                 \
   3636                                       kInput16bits##input_m,                 \
   3637                                       kInputHIndices);                       \
   3638     }                                                                        \
   3639     TEST(mnemonic##_D_S_S) {                                                 \
   3640       CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3641                                       D, S, S,                               \
   3642                                       kInput64bits##input_d,                 \
   3643                                       kInput32bits##input_n,                 \
   3644                                       kInput32bits##input_m,                 \
   3645                                       kInputSIndices);                       \
   3646     }
   3647 
   3648 
   3649 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                              \
   3650                                       variant,                               \
   3651                                       input_d,                               \
   3652                                       input_imm1,                            \
   3653                                       input_n,                               \
   3654                                       input_imm2) {                          \
   3655     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,              \
   3656                                      mnemonic,                               \
   3657                                      variant, variant,                       \
   3658                                      input_d, input_imm1,                    \
   3659                                      input_n, input_imm2);                   \
   3660     }
   3661 
   3662 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                                   \
   3663                                  input_d, input_imm1,                        \
   3664                                  input_n, input_imm2)                        \
   3665     TEST(mnemonic##_B) {                                                     \
   3666       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
   3667                                     16B,                                     \
   3668                                     kInput8bits##input_d,                    \
   3669                                     kInput8bitsImm##input_imm1,              \
   3670                                     kInput8bits##input_n,                    \
   3671                                     kInput8bitsImm##input_imm2);             \
   3672     }                                                                        \
   3673     TEST(mnemonic##_H) {                                                     \
   3674       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
   3675                                     8H,                                      \
   3676                                     kInput16bits##input_d,                   \
   3677                                     kInput16bitsImm##input_imm1,             \
   3678                                     kInput16bits##input_n,                   \
   3679                                     kInput16bitsImm##input_imm2);            \
   3680     }                                                                        \
   3681     TEST(mnemonic##_S) {                                                     \
   3682       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
   3683                                     4S,                                      \
   3684                                     kInput32bits##input_d,                   \
   3685                                     kInput32bitsImm##input_imm1,             \
   3686                                     kInput32bits##input_n,                   \
   3687                                     kInput32bitsImm##input_imm2);            \
   3688     }                                                                        \
   3689     TEST(mnemonic##_D) {                                                     \
   3690       CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
   3691                                     2D,                                      \
   3692                                     kInput64bits##input_d,                   \
   3693                                     kInput64bitsImm##input_imm1,             \
   3694                                     kInput64bits##input_n,                   \
   3695                                     kInput64bitsImm##input_imm2);            \
   3696     }
   3697 
   3698 
   3699 // Advanced SIMD copy.
   3700 DEFINE_TEST_NEON_2OP2IMM(ins,
   3701                          Basic, LaneCountFromZero,
   3702                          Basic, LaneCountFromZero)
   3703 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
   3704 
   3705 
   3706 // Advanced SIMD scalar copy.
   3707 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
   3708 
   3709 
   3710 // Advanced SIMD three same.
   3711 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
   3712 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
   3713 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
   3714 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
   3715 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
   3716 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
   3717 DEFINE_TEST_NEON_3SAME(cmge, Basic)
   3718 DEFINE_TEST_NEON_3SAME(sshl, Basic)
   3719 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
   3720 DEFINE_TEST_NEON_3SAME(srshl, Basic)
   3721 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
   3722 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
   3723 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
   3724 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
   3725 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
   3726 DEFINE_TEST_NEON_3SAME(add, Basic)
   3727 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
   3728 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
   3729 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
   3730 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
   3731 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
   3732 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
   3733 DEFINE_TEST_NEON_3SAME(addp, Basic)
   3734 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
   3735 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
   3736 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
   3737 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
   3738 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
   3739 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
   3740 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
   3741 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
   3742 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
   3743 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
   3744 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
   3745 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
   3746 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
   3747 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
   3748 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
   3749 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
   3750 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
   3751 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
   3752 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
   3753 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
   3754 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
   3755 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
   3756 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
   3757 DEFINE_TEST_NEON_3SAME(ushl, Basic)
   3758 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
   3759 DEFINE_TEST_NEON_3SAME(urshl, Basic)
   3760 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
   3761 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
   3762 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
   3763 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
   3764 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
   3765 DEFINE_TEST_NEON_3SAME(sub, Basic)
   3766 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
   3767 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
   3768 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
   3769 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
   3770 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
   3771 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
   3772 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
   3773 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
   3774 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
   3775 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
   3776 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
   3777 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
   3778 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
   3779 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
   3780 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
   3781 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
   3782 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
   3783 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
   3784 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
   3785 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
   3786 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
   3787 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
   3788 
   3789 
   3790 // Advanced SIMD scalar three same.
   3791 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
   3792 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
   3793 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
   3794 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
   3795 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
   3796 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
   3797 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
   3798 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
   3799 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
   3800 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
   3801 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
   3802 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
   3803 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
   3804 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
   3805 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
   3806 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
   3807 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
   3808 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
   3809 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
   3810 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
   3811 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
   3812 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
   3813 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
   3814 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
   3815 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
   3816 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
   3817 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
   3818 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
   3819 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
   3820 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
   3821 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
   3822 
   3823 
   3824 // Advanced SIMD three different.
   3825 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
   3826 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
   3827 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
   3828 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
   3829 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
   3830 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
   3831 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
   3832 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
   3833 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
   3834 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
   3835 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
   3836 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
   3837 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
   3838 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
   3839 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
   3840 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
   3841 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
   3842 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
   3843 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
   3844 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
   3845 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
   3846 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
   3847 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
   3848 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
   3849 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
   3850 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
   3851 
   3852 
   3853 // Advanced SIMD scalar three different.
   3854 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
   3855 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
   3856 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
   3857 
   3858 
   3859 // Advanced SIMD scalar pairwise.
   3860 TEST(addp_SCALAR) {
   3861   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
   3862 }
   3863 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
   3864 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
   3865 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
   3866 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
   3867 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
   3868 
   3869 
   3870 // Advanced SIMD shift by immediate.
   3871 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
   3872 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
   3873 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
   3874 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
   3875 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
   3876 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
   3877 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
   3878 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
   3879 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
   3880 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
   3881 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
   3882 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \
   3883                            TypeWidthFromZeroToWidth)
   3884 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
   3885 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
   3886 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
   3887 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
   3888 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
   3889 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
   3890 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
   3891 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
   3892 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
   3893 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
   3894 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
   3895 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
   3896 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
   3897 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
   3898 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \
   3899                            TypeWidthFromZeroToWidth)
   3900 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
   3901 
   3902 
   3903 // Advanced SIMD scalar shift by immediate..
   3904 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
   3905 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
   3906 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
   3907 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
   3908 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
   3909 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
   3910 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
   3911 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
   3912 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \
   3913                                   TypeWidthFromZeroToWidth)
   3914 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
   3915 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
   3916 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
   3917 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
   3918 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
   3919 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
   3920 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
   3921 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
   3922 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
   3923 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
   3924 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
   3925 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
   3926 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
   3927 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \
   3928                                   TypeWidthFromZeroToWidth)
   3929 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
   3930 
   3931 
   3932 // Advanced SIMD two-register miscellaneous.
   3933 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
   3934 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
   3935 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
   3936 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
   3937 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
   3938 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
   3939 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
   3940 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
   3941 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
   3942 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
   3943 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
   3944 DEFINE_TEST_NEON_2SAME(abs, Basic)
   3945 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
   3946 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
   3947 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
   3948 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
   3949 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
   3950 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
   3951 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
   3952 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
   3953 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
   3954 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
   3955 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
   3956 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
   3957 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
   3958 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
   3959 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
   3960 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
   3961 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
   3962 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
   3963 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
   3964 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
   3965 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
   3966 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
   3967 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
   3968 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
   3969 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
   3970 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
   3971 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
   3972 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
   3973 DEFINE_TEST_NEON_2SAME(neg, Basic)
   3974 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
   3975 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
   3976 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
   3977 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
   3978 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
   3979 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
   3980 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
   3981 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
   3982 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
   3983 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
   3984 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
   3985 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
   3986 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
   3987 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
   3988 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
   3989 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
   3990 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
   3991 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
   3992 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
   3993 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
   3994 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
   3995 
   3996 
   3997 // Advanced SIMD scalar two-register miscellaneous.
   3998 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
   3999 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
   4000 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
   4001 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
   4002 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
   4003 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
   4004 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
   4005 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
   4006 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
   4007 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
   4008 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
   4009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
   4010 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
   4011 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
   4012 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
   4013 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
   4014 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
   4015 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
   4016 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
   4017 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
   4018 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
   4019 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
   4020 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
   4021 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
   4022 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
   4023 TEST(fcvtxn_SCALAR) {
   4024   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
   4025 }
   4026 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
   4027 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
   4028 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
   4029 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
   4030 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
   4031 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
   4032 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
   4033 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
   4034 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
   4035 
   4036 
   4037 // Advanced SIMD across lanes.
   4038 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
   4039 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
   4040 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
   4041 DEFINE_TEST_NEON_ACROSS(addv, Basic)
   4042 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
   4043 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
   4044 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
   4045 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
   4046 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
   4047 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
   4048 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
   4049 
   4050 
   4051 // Advanced SIMD permute.
   4052 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
   4053 DEFINE_TEST_NEON_3SAME(trn1, Basic)
   4054 DEFINE_TEST_NEON_3SAME(zip1, Basic)
   4055 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
   4056 DEFINE_TEST_NEON_3SAME(trn2, Basic)
   4057 DEFINE_TEST_NEON_3SAME(zip2, Basic)
   4058 
   4059 
   4060 // Advanced SIMD vector x indexed element.
   4061 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
   4062 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
   4063 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
   4064 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
   4065 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
   4066 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
   4067 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
   4068 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
   4069 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
   4070 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
   4071 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
   4072 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
   4073 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
   4074 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
   4075 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
   4076 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
   4077 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
   4078 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
   4079 
   4080 
   4081 // Advanced SIMD scalar x indexed element.
   4082 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
   4083 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
   4084 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
   4085 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
   4086 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
   4087 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
   4088 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
   4089 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
   4090 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
   4091 
   4092 }  // namespace vixl
   4093