Home | History | Annotate | Download | only in aarch64
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include <cfloat>
     28 #include <cstdio>
     29 
     30 #include <sstream>
     31 
     32 #include "test-runner.h"
     33 #include "test-utils.h"
     34 
     35 #include "aarch64/test-simulator-inputs-aarch64.h"
     36 #include "aarch64/test-simulator-traces-aarch64.h"
     37 #include "aarch64/test-utils-aarch64.h"
     38 
     39 #include "aarch64/cpu-features-auditor-aarch64.h"
     40 #include "aarch64/macro-assembler-aarch64.h"
     41 #include "aarch64/simulator-aarch64.h"
     42 
     43 namespace vixl {
     44 namespace aarch64 {
     45 
     46 // ==== Simulator Tests ====
     47 //
     48 // These simulator tests check instruction behaviour against a trace taken from
     49 // real AArch64 hardware. The same test code is used to generate the trace; the
     50 // results are printed to stdout when the test is run with
     51 // --generate_test_trace.
     52 //
     53 // The input lists and expected results are stored in test/traces. The expected
     54 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
     55 // test for a new instruction is described at the top of
     56 // test-simulator-traces-aarch64.h.
     57 
     58 #define __ masm.
     59 #define TEST(name) TEST_(AARCH64_SIM_##name)
     60 
     61 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
     62 
     63 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     64 
     65 #define SETUP_WITH_FEATURES(...)                      \
     66   MacroAssembler masm;                                \
     67   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__));      \
     68   Decoder decoder;                                    \
     69   Simulator simulator(&decoder);                      \
     70   simulator.SetColouredTrace(Test::coloured_trace()); \
     71   simulator.SetInstructionStats(Test::instruction_stats());
     72 
     73 #define START()                         \
     74   masm.Reset();                         \
     75   simulator.ResetState();               \
     76   __ PushCalleeSavedRegisters();        \
     77   if (Test::trace_reg()) {              \
     78     __ Trace(LOG_STATE, TRACE_ENABLE);  \
     79   }                                     \
     80   if (Test::trace_write()) {            \
     81     __ Trace(LOG_WRITE, TRACE_ENABLE);  \
     82   }                                     \
     83   if (Test::trace_sim()) {              \
     84     __ Trace(LOG_DISASM, TRACE_ENABLE); \
     85   }                                     \
     86   if (Test::instruction_stats()) {      \
     87     __ EnableInstrumentation();         \
     88   }
     89 
     90 #define END()                       \
     91   if (Test::instruction_stats()) {  \
     92     __ DisableInstrumentation();    \
     93   }                                 \
     94   __ Trace(LOG_ALL, TRACE_DISABLE); \
     95   __ PopCalleeSavedRegisters();     \
     96   __ Ret();                         \
     97   masm.FinalizeCode()
     98 
     99 #define TRY_RUN(skipped)                                                \
    100   DISASSEMBLE();                                                        \
    101   simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
    102   /* The simulator can run every test. */                               \
    103   *skipped = false
    104 
    105 #define TEARDOWN()
    106 
    107 #else  // VIXL_INCLUDE_SIMULATOR_AARCH64
    108 
    109 #define SETUP_WITH_FEATURES(...)                 \
    110   MacroAssembler masm;                           \
    111   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
    112   CPU::SetUp()
    113 
    114 #define START() \
    115   masm.Reset(); \
    116   __ PushCalleeSavedRegisters()
    117 
    118 #define END()                   \
    119   __ PopCalleeSavedRegisters(); \
    120   __ Ret();                     \
    121   masm.FinalizeCode()
    122 
    123 #define TRY_RUN(skipped)                                                      \
    124   DISASSEMBLE();                                                              \
    125   /* If the test uses features that the current CPU doesn't support, don't */ \
    126   /* attempt to run it natively.                                           */ \
    127   {                                                                           \
    128     Decoder decoder;                                                          \
    129     /* TODO: Once available, use runtime feature detection. The use of  */    \
    130     /* AArch64LegacyBaseline is a stopgap.                              */    \
    131     const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline();   \
    132     CPUFeaturesAuditor auditor(&decoder, this_machine);                       \
    133     CodeBuffer* buffer = masm.GetBuffer();                                    \
    134     decoder.Decode(buffer->GetStartAddress<Instruction*>(),                   \
    135                    buffer->GetEndAddress<Instruction*>());                    \
    136     const CPUFeatures& requirements = auditor.GetSeenFeatures();              \
    137     if (this_machine.Has(requirements)) {                                     \
    138       masm.GetBuffer()->SetExecutable();                                      \
    139       ExecuteMemory(buffer->GetStartAddress<byte*>(),                         \
    140                     masm.GetSizeOfCodeGenerated());                           \
    141       masm.GetBuffer()->SetWritable();                                        \
    142       *skipped = false;                                                       \
    143     } else {                                                                  \
    144       std::stringstream os;                                                   \
    145       os << "Warning: skipping test due to missing CPU features.\n";          \
    146       os << "  Missing: {" << requirements.Without(this_machine) << "}\n";    \
    147       printf("%s", os.str().c_str());                                         \
    148       *skipped = true;                                                        \
    149     }                                                                         \
    150   }
    151 
    152 #define TEARDOWN()
    153 
    154 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
    155 
    156 
    157 #define DISASSEMBLE()                                             \
    158   if (Test::disassemble()) {                                      \
    159     PrintDisassembler disasm(stdout);                             \
    160     CodeBuffer* buffer = masm.GetBuffer();                        \
    161     Instruction* start = buffer->GetStartAddress<Instruction*>(); \
    162     Instruction* end = buffer->GetEndAddress<Instruction*>();     \
    163     disasm.DisassembleBuffer(start, end);                         \
    164   }
    165 
    166 // The maximum number of errors to report in detail for each test.
    167 static const unsigned kErrorReportLimit = 8;
    168 
    169 
    170 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
    171 // templated test functions.
    172 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
    173 
    174 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
    175 
    176 // The rawbits_to_fp functions are only used for printing decimal values so we
    177 // just approximate FP16 as double.
    178 static double rawbits_to_fp(uint16_t bits) {
    179   return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
    180 }
    181 
    182 
    183 // MacroAssembler member function pointers to pass to the test dispatchers.
    184 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
    185                                                   const FPRegister& fn);
    186 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
    187                                                   const FPRegister& fn,
    188                                                   const FPRegister& fm);
    189 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
    190                                                   const FPRegister& fn,
    191                                                   const FPRegister& fm,
    192                                                   const FPRegister& fa);
    193 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
    194                                                   const FPRegister& fm);
    195 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
    196                                                       double value);
    197 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
    198                                                     const FPRegister& fn);
    199 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
    200                                                       const FPRegister& fn,
    201                                                       int fbits);
    202 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
    203                                                       const Register& rn,
    204                                                       int fbits);
    205 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
    206 //       consolidated into one routine.
    207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
    208                                                     const VRegister& vn);
    209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
    210                                                     const VRegister& vn,
    211                                                     const VRegister& vm);
    212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
    213                                                           const VRegister& vn,
    214                                                           const VRegister& vm,
    215                                                           int vm_index);
    216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
    217     const VRegister& vd, int imm1, const VRegister& vn, int imm2);
    218 
    219 // This helps using the same typename for both the function pointer
    220 // and the array of immediates passed to helper routines.
    221 template <typename T>
    222 class Test2OpImmediateNEONHelper_t {
    223  public:
    224   typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
    225                                            const VRegister& vn,
    226                                            T imm);
    227 };
    228 
    229 
    230 // Maximum number of hex characters required to represent values of either
    231 // templated type.
    232 template <typename Ta, typename Tb>
    233 static unsigned MaxHexCharCount() {
    234   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
    235   return (count * 8) / 4;
    236 }
    237 
    238 
    239 // Standard test dispatchers.
    240 
    241 
    242 static void Test1Op_Helper(Test1OpFPHelper_t helper,
    243                            uintptr_t inputs,
    244                            unsigned inputs_length,
    245                            uintptr_t results,
    246                            unsigned d_size,
    247                            unsigned n_size,
    248                            bool* skipped) {
    249   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
    250               (d_size == kHRegSize));
    251   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
    252               (n_size == kHRegSize));
    253 
    254   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
    255   START();
    256 
    257   // Roll up the loop to keep the code size down.
    258   Label loop_n;
    259 
    260   Register out = x0;
    261   Register inputs_base = x1;
    262   Register length = w2;
    263   Register index_n = w3;
    264 
    265   int n_index_shift;
    266   FPRegister fd;
    267   FPRegister fn;
    268   if (n_size == kDRegSize) {
    269     n_index_shift = kDRegSizeInBytesLog2;
    270     fn = d1;
    271   } else if (n_size == kSRegSize) {
    272     n_index_shift = kSRegSizeInBytesLog2;
    273     fn = s1;
    274   } else {
    275     n_index_shift = kHRegSizeInBytesLog2;
    276     fn = h1;
    277   }
    278 
    279   if (d_size == kDRegSize) {
    280     fd = d0;
    281   } else if (d_size == kSRegSize) {
    282     fd = s0;
    283   } else {
    284     fd = h0;
    285   }
    286 
    287 
    288   __ Mov(out, results);
    289   __ Mov(inputs_base, inputs);
    290   __ Mov(length, inputs_length);
    291 
    292   __ Mov(index_n, 0);
    293   __ Bind(&loop_n);
    294   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
    295 
    296   {
    297     SingleEmissionCheckScope guard(&masm);
    298     (masm.*helper)(fd, fn);
    299   }
    300   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
    301 
    302   __ Add(index_n, index_n, 1);
    303   __ Cmp(index_n, inputs_length);
    304   __ B(lo, &loop_n);
    305 
    306   END();
    307   TRY_RUN(skipped);
    308   TEARDOWN();
    309 }
    310 
    311 
    312 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    313 // rawbits representations of doubles or floats. This ensures that exact bit
    314 // comparisons can be performed.
    315 template <typename Tn, typename Td>
    316 static void Test1Op(const char* name,
    317                     Test1OpFPHelper_t helper,
    318                     const Tn inputs[],
    319                     unsigned inputs_length,
    320                     const Td expected[],
    321                     unsigned expected_length) {
    322   VIXL_ASSERT(inputs_length > 0);
    323 
    324   const unsigned results_length = inputs_length;
    325   Td* results = new Td[results_length];
    326 
    327   const unsigned d_bits = sizeof(Td) * 8;
    328   const unsigned n_bits = sizeof(Tn) * 8;
    329   bool skipped;
    330 
    331   Test1Op_Helper(helper,
    332                  reinterpret_cast<uintptr_t>(inputs),
    333                  inputs_length,
    334                  reinterpret_cast<uintptr_t>(results),
    335                  d_bits,
    336                  n_bits,
    337                  &skipped);
    338 
    339   if (Test::generate_test_trace()) {
    340     // Print the results.
    341     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
    342     for (unsigned d = 0; d < results_length; d++) {
    343       printf("  0x%0*" PRIx64 ",\n",
    344              d_bits / 4,
    345              static_cast<uint64_t>(results[d]));
    346     }
    347     printf("};\n");
    348     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    349   } else if (!skipped) {
    350     // Check the results.
    351     VIXL_CHECK(expected_length == results_length);
    352     unsigned error_count = 0;
    353     unsigned d = 0;
    354     for (unsigned n = 0; n < inputs_length; n++, d++) {
    355       if (results[d] != expected[d]) {
    356         if (++error_count > kErrorReportLimit) continue;
    357 
    358         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
    359                name,
    360                n_bits / 4,
    361                static_cast<uint64_t>(inputs[n]),
    362                name,
    363                rawbits_to_fp(inputs[n]));
    364         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    365                d_bits / 4,
    366                static_cast<uint64_t>(expected[d]),
    367                rawbits_to_fp(expected[d]));
    368         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    369                d_bits / 4,
    370                static_cast<uint64_t>(results[d]),
    371                rawbits_to_fp(results[d]));
    372         printf("\n");
    373       }
    374     }
    375     VIXL_ASSERT(d == expected_length);
    376     if (error_count > kErrorReportLimit) {
    377       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    378     }
    379     VIXL_CHECK(error_count == 0);
    380   }
    381   delete[] results;
    382 }
    383 
    384 
    385 static void Test2Op_Helper(Test2OpFPHelper_t helper,
    386                            uintptr_t inputs,
    387                            unsigned inputs_length,
    388                            uintptr_t results,
    389                            unsigned reg_size,
    390                            bool* skipped) {
    391   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
    392               (reg_size == kHRegSize));
    393 
    394   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
    395   START();
    396 
    397   // Roll up the loop to keep the code size down.
    398   Label loop_n, loop_m;
    399 
    400   Register out = x0;
    401   Register inputs_base = x1;
    402   Register length = w2;
    403   Register index_n = w3;
    404   Register index_m = w4;
    405 
    406   bool double_op = reg_size == kDRegSize;
    407   bool float_op = reg_size == kSRegSize;
    408   int index_shift;
    409   if (double_op) {
    410     index_shift = kDRegSizeInBytesLog2;
    411   } else if (float_op) {
    412     index_shift = kSRegSizeInBytesLog2;
    413   } else {
    414     index_shift = kHRegSizeInBytesLog2;
    415   }
    416 
    417   FPRegister fd;
    418   FPRegister fn;
    419   FPRegister fm;
    420 
    421   if (double_op) {
    422     fd = d0;
    423     fn = d1;
    424     fm = d2;
    425   } else if (float_op) {
    426     fd = s0;
    427     fn = s1;
    428     fm = s2;
    429   } else {
    430     fd = h0;
    431     fn = h1;
    432     fm = h2;
    433   }
    434 
    435   __ Mov(out, results);
    436   __ Mov(inputs_base, inputs);
    437   __ Mov(length, inputs_length);
    438 
    439   __ Mov(index_n, 0);
    440   __ Bind(&loop_n);
    441   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    442 
    443   __ Mov(index_m, 0);
    444   __ Bind(&loop_m);
    445   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    446 
    447   {
    448     SingleEmissionCheckScope guard(&masm);
    449     (masm.*helper)(fd, fn, fm);
    450   }
    451   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
    452 
    453   __ Add(index_m, index_m, 1);
    454   __ Cmp(index_m, inputs_length);
    455   __ B(lo, &loop_m);
    456 
    457   __ Add(index_n, index_n, 1);
    458   __ Cmp(index_n, inputs_length);
    459   __ B(lo, &loop_n);
    460 
    461   END();
    462   TRY_RUN(skipped);
    463   TEARDOWN();
    464 }
    465 
    466 
    467 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    468 // rawbits representations of doubles or floats. This ensures that exact bit
    469 // comparisons can be performed.
    470 template <typename T>
    471 static void Test2Op(const char* name,
    472                     Test2OpFPHelper_t helper,
    473                     const T inputs[],
    474                     unsigned inputs_length,
    475                     const T expected[],
    476                     unsigned expected_length) {
    477   VIXL_ASSERT(inputs_length > 0);
    478 
    479   const unsigned results_length = inputs_length * inputs_length;
    480   T* results = new T[results_length];
    481 
    482   const unsigned bits = sizeof(T) * 8;
    483   bool skipped;
    484 
    485   Test2Op_Helper(helper,
    486                  reinterpret_cast<uintptr_t>(inputs),
    487                  inputs_length,
    488                  reinterpret_cast<uintptr_t>(results),
    489                  bits,
    490                  &skipped);
    491 
    492   if (Test::generate_test_trace()) {
    493     // Print the results.
    494     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
    495     for (unsigned d = 0; d < results_length; d++) {
    496       printf("  0x%0*" PRIx64 ",\n",
    497              bits / 4,
    498              static_cast<uint64_t>(results[d]));
    499     }
    500     printf("};\n");
    501     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    502   } else if (!skipped) {
    503     // Check the results.
    504     VIXL_CHECK(expected_length == results_length);
    505     unsigned error_count = 0;
    506     unsigned d = 0;
    507     for (unsigned n = 0; n < inputs_length; n++) {
    508       for (unsigned m = 0; m < inputs_length; m++, d++) {
    509         if (results[d] != expected[d]) {
    510           if (++error_count > kErrorReportLimit) continue;
    511 
    512           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
    513                  name,
    514                  bits / 4,
    515                  static_cast<uint64_t>(inputs[n]),
    516                  bits / 4,
    517                  static_cast<uint64_t>(inputs[m]),
    518                  name,
    519                  rawbits_to_fp(inputs[n]),
    520                  rawbits_to_fp(inputs[m]));
    521           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    522                  bits / 4,
    523                  static_cast<uint64_t>(expected[d]),
    524                  rawbits_to_fp(expected[d]));
    525           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    526                  bits / 4,
    527                  static_cast<uint64_t>(results[d]),
    528                  rawbits_to_fp(results[d]));
    529           printf("\n");
    530         }
    531       }
    532     }
    533     VIXL_ASSERT(d == expected_length);
    534     if (error_count > kErrorReportLimit) {
    535       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    536     }
    537     VIXL_CHECK(error_count == 0);
    538   }
    539   delete[] results;
    540 }
    541 
    542 
    543 static void Test3Op_Helper(Test3OpFPHelper_t helper,
    544                            uintptr_t inputs,
    545                            unsigned inputs_length,
    546                            uintptr_t results,
    547                            unsigned reg_size,
    548                            bool* skipped) {
    549   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
    550               (reg_size == kHRegSize));
    551 
    552   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
    553   START();
    554 
    555   // Roll up the loop to keep the code size down.
    556   Label loop_n, loop_m, loop_a;
    557 
    558   Register out = x0;
    559   Register inputs_base = x1;
    560   Register length = w2;
    561   Register index_n = w3;
    562   Register index_m = w4;
    563   Register index_a = w5;
    564 
    565   bool double_op = reg_size == kDRegSize;
    566   bool single_op = reg_size == kSRegSize;
    567   int index_shift;
    568   FPRegister fd(0, reg_size);
    569   FPRegister fn(1, reg_size);
    570   FPRegister fm(2, reg_size);
    571   FPRegister fa(3, reg_size);
    572   if (double_op) {
    573     index_shift = kDRegSizeInBytesLog2;
    574   } else if (single_op) {
    575     index_shift = kSRegSizeInBytesLog2;
    576   } else {
    577     index_shift = kHRegSizeInBytesLog2;
    578   }
    579 
    580   __ Mov(out, results);
    581   __ Mov(inputs_base, inputs);
    582   __ Mov(length, inputs_length);
    583 
    584   __ Mov(index_n, 0);
    585   __ Bind(&loop_n);
    586   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    587 
    588   __ Mov(index_m, 0);
    589   __ Bind(&loop_m);
    590   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    591 
    592   __ Mov(index_a, 0);
    593   __ Bind(&loop_a);
    594   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
    595 
    596   {
    597     SingleEmissionCheckScope guard(&masm);
    598     (masm.*helper)(fd, fn, fm, fa);
    599   }
    600   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
    601 
    602   __ Add(index_a, index_a, 1);
    603   __ Cmp(index_a, inputs_length);
    604   __ B(lo, &loop_a);
    605 
    606   __ Add(index_m, index_m, 1);
    607   __ Cmp(index_m, inputs_length);
    608   __ B(lo, &loop_m);
    609 
    610   __ Add(index_n, index_n, 1);
    611   __ Cmp(index_n, inputs_length);
    612   __ B(lo, &loop_n);
    613 
    614   END();
    615   TRY_RUN(skipped);
    616   TEARDOWN();
    617 }
    618 
    619 
    620 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    621 // rawbits representations of doubles or floats. This ensures that exact bit
    622 // comparisons can be performed.
    623 template <typename T>
    624 static void Test3Op(const char* name,
    625                     Test3OpFPHelper_t helper,
    626                     const T inputs[],
    627                     unsigned inputs_length,
    628                     const T expected[],
    629                     unsigned expected_length) {
    630   VIXL_ASSERT(inputs_length > 0);
    631 
    632   const unsigned results_length = inputs_length * inputs_length * inputs_length;
    633   T* results = new T[results_length];
    634 
    635   const unsigned bits = sizeof(T) * 8;
    636   bool skipped;
    637 
    638   Test3Op_Helper(helper,
    639                  reinterpret_cast<uintptr_t>(inputs),
    640                  inputs_length,
    641                  reinterpret_cast<uintptr_t>(results),
    642                  bits,
    643                  &skipped);
    644 
    645   if (Test::generate_test_trace()) {
    646     // Print the results.
    647     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
    648     for (unsigned d = 0; d < results_length; d++) {
    649       printf("  0x%0*" PRIx64 ",\n",
    650              bits / 4,
    651              static_cast<uint64_t>(results[d]));
    652     }
    653     printf("};\n");
    654     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    655   } else if (!skipped) {
    656     // Check the results.
    657     VIXL_CHECK(expected_length == results_length);
    658     unsigned error_count = 0;
    659     unsigned d = 0;
    660     for (unsigned n = 0; n < inputs_length; n++) {
    661       for (unsigned m = 0; m < inputs_length; m++) {
    662         for (unsigned a = 0; a < inputs_length; a++, d++) {
    663           if (results[d] != expected[d]) {
    664             if (++error_count > kErrorReportLimit) continue;
    665 
    666             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
    667                    " (%s %g %g %g):\n",
    668                    name,
    669                    bits / 4,
    670                    static_cast<uint64_t>(inputs[n]),
    671                    bits / 4,
    672                    static_cast<uint64_t>(inputs[m]),
    673                    bits / 4,
    674                    static_cast<uint64_t>(inputs[a]),
    675                    name,
    676                    rawbits_to_fp(inputs[n]),
    677                    rawbits_to_fp(inputs[m]),
    678                    rawbits_to_fp(inputs[a]));
    679             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    680                    bits / 4,
    681                    static_cast<uint64_t>(expected[d]),
    682                    rawbits_to_fp(expected[d]));
    683             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    684                    bits / 4,
    685                    static_cast<uint64_t>(results[d]),
    686                    rawbits_to_fp(results[d]));
    687             printf("\n");
    688           }
    689         }
    690       }
    691     }
    692     VIXL_ASSERT(d == expected_length);
    693     if (error_count > kErrorReportLimit) {
    694       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    695     }
    696     VIXL_CHECK(error_count == 0);
    697   }
    698   delete[] results;
    699 }
    700 
    701 
    702 static void TestCmp_Helper(TestFPCmpHelper_t helper,
    703                            uintptr_t inputs,
    704                            unsigned inputs_length,
    705                            uintptr_t results,
    706                            unsigned reg_size,
    707                            bool* skipped) {
    708   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    709 
    710   SETUP_WITH_FEATURES(CPUFeatures::kFP);
    711   START();
    712 
    713   // Roll up the loop to keep the code size down.
    714   Label loop_n, loop_m;
    715 
    716   Register out = x0;
    717   Register inputs_base = x1;
    718   Register length = w2;
    719   Register index_n = w3;
    720   Register index_m = w4;
    721   Register flags = x5;
    722 
    723   bool double_op = reg_size == kDRegSize;
    724   const int index_shift =
    725       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    726 
    727   FPRegister fn = double_op ? d1 : s1;
    728   FPRegister fm = double_op ? d2 : s2;
    729 
    730   __ Mov(out, results);
    731   __ Mov(inputs_base, inputs);
    732   __ Mov(length, inputs_length);
    733 
    734   __ Mov(index_n, 0);
    735   __ Bind(&loop_n);
    736   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    737 
    738   __ Mov(index_m, 0);
    739   __ Bind(&loop_m);
    740   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    741 
    742   {
    743     SingleEmissionCheckScope guard(&masm);
    744     (masm.*helper)(fn, fm);
    745   }
    746   __ Mrs(flags, NZCV);
    747   __ Ubfx(flags, flags, 28, 4);
    748   __ Strb(flags, MemOperand(out, 1, PostIndex));
    749 
    750   __ Add(index_m, index_m, 1);
    751   __ Cmp(index_m, inputs_length);
    752   __ B(lo, &loop_m);
    753 
    754   __ Add(index_n, index_n, 1);
    755   __ Cmp(index_n, inputs_length);
    756   __ B(lo, &loop_n);
    757 
    758   END();
    759   TRY_RUN(skipped);
    760   TEARDOWN();
    761 }
    762 
    763 
    764 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    765 // rawbits representations of doubles or floats. This ensures that exact bit
    766 // comparisons can be performed.
    767 template <typename T>
    768 static void TestCmp(const char* name,
    769                     TestFPCmpHelper_t helper,
    770                     const T inputs[],
    771                     unsigned inputs_length,
    772                     const uint8_t expected[],
    773                     unsigned expected_length) {
    774   VIXL_ASSERT(inputs_length > 0);
    775 
    776   const unsigned results_length = inputs_length * inputs_length;
    777   uint8_t* results = new uint8_t[results_length];
    778 
    779   const unsigned bits = sizeof(T) * 8;
    780   bool skipped;
    781 
    782   TestCmp_Helper(helper,
    783                  reinterpret_cast<uintptr_t>(inputs),
    784                  inputs_length,
    785                  reinterpret_cast<uintptr_t>(results),
    786                  bits,
    787                  &skipped);
    788 
    789   if (Test::generate_test_trace()) {
    790     // Print the results.
    791     printf("const uint8_t kExpected_%s[] = {\n", name);
    792     for (unsigned d = 0; d < results_length; d++) {
    793       // Each NZCV result only requires 4 bits.
    794       VIXL_ASSERT((results[d] & 0xf) == results[d]);
    795       printf("  0x%" PRIx8 ",\n", results[d]);
    796     }
    797     printf("};\n");
    798     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    799   } else if (!skipped) {
    800     // Check the results.
    801     VIXL_CHECK(expected_length == results_length);
    802     unsigned error_count = 0;
    803     unsigned d = 0;
    804     for (unsigned n = 0; n < inputs_length; n++) {
    805       for (unsigned m = 0; m < inputs_length; m++, d++) {
    806         if (results[d] != expected[d]) {
    807           if (++error_count > kErrorReportLimit) continue;
    808 
    809           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
    810                  name,
    811                  bits / 4,
    812                  static_cast<uint64_t>(inputs[n]),
    813                  bits / 4,
    814                  static_cast<uint64_t>(inputs[m]),
    815                  name,
    816                  rawbits_to_fp(inputs[n]),
    817                  rawbits_to_fp(inputs[m]));
    818           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
    819                  (expected[d] & 0x8) ? 'N' : 'n',
    820                  (expected[d] & 0x4) ? 'Z' : 'z',
    821                  (expected[d] & 0x2) ? 'C' : 'c',
    822                  (expected[d] & 0x1) ? 'V' : 'v',
    823                  expected[d]);
    824           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
    825                  (results[d] & 0x8) ? 'N' : 'n',
    826                  (results[d] & 0x4) ? 'Z' : 'z',
    827                  (results[d] & 0x2) ? 'C' : 'c',
    828                  (results[d] & 0x1) ? 'V' : 'v',
    829                  results[d]);
    830           printf("\n");
    831         }
    832       }
    833     }
    834     VIXL_ASSERT(d == expected_length);
    835     if (error_count > kErrorReportLimit) {
    836       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    837     }
    838     VIXL_CHECK(error_count == 0);
    839   }
    840   delete[] results;
    841 }
    842 
    843 
    844 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
    845                                uintptr_t inputs,
    846                                unsigned inputs_length,
    847                                uintptr_t results,
    848                                unsigned reg_size,
    849                                bool* skipped) {
    850   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    851 
    852   SETUP_WITH_FEATURES(CPUFeatures::kFP);
    853   START();
    854 
    855   // Roll up the loop to keep the code size down.
    856   Label loop_n, loop_m;
    857 
    858   Register out = x0;
    859   Register inputs_base = x1;
    860   Register length = w2;
    861   Register index_n = w3;
    862   Register flags = x4;
    863 
    864   bool double_op = reg_size == kDRegSize;
    865   const int index_shift =
    866       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    867 
    868   FPRegister fn = double_op ? d1 : s1;
    869 
    870   __ Mov(out, results);
    871   __ Mov(inputs_base, inputs);
    872   __ Mov(length, inputs_length);
    873 
    874   __ Mov(index_n, 0);
    875   __ Bind(&loop_n);
    876   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    877 
    878   {
    879     SingleEmissionCheckScope guard(&masm);
    880     (masm.*helper)(fn, 0.0);
    881   }
    882   __ Mrs(flags, NZCV);
    883   __ Ubfx(flags, flags, 28, 4);
    884   __ Strb(flags, MemOperand(out, 1, PostIndex));
    885 
    886   __ Add(index_n, index_n, 1);
    887   __ Cmp(index_n, inputs_length);
    888   __ B(lo, &loop_n);
    889 
    890   END();
    891   TRY_RUN(skipped);
    892   TEARDOWN();
    893 }
    894 
    895 
    896 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    897 // rawbits representations of doubles or floats. This ensures that exact bit
    898 // comparisons can be performed.
    899 template <typename T>
    900 static void TestCmpZero(const char* name,
    901                         TestFPCmpZeroHelper_t helper,
    902                         const T inputs[],
    903                         unsigned inputs_length,
    904                         const uint8_t expected[],
    905                         unsigned expected_length) {
    906   VIXL_ASSERT(inputs_length > 0);
    907 
    908   const unsigned results_length = inputs_length;
    909   uint8_t* results = new uint8_t[results_length];
    910 
    911   const unsigned bits = sizeof(T) * 8;
    912   bool skipped;
    913 
    914   TestCmpZero_Helper(helper,
    915                      reinterpret_cast<uintptr_t>(inputs),
    916                      inputs_length,
    917                      reinterpret_cast<uintptr_t>(results),
    918                      bits,
    919                      &skipped);
    920 
    921   if (Test::generate_test_trace()) {
    922     // Print the results.
    923     printf("const uint8_t kExpected_%s[] = {\n", name);
    924     for (unsigned d = 0; d < results_length; d++) {
    925       // Each NZCV result only requires 4 bits.
    926       VIXL_ASSERT((results[d] & 0xf) == results[d]);
    927       printf("  0x%" PRIx8 ",\n", results[d]);
    928     }
    929     printf("};\n");
    930     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    931   } else if (!skipped) {
    932     // Check the results.
    933     VIXL_CHECK(expected_length == results_length);
    934     unsigned error_count = 0;
    935     unsigned d = 0;
    936     for (unsigned n = 0; n < inputs_length; n++, d++) {
    937       if (results[d] != expected[d]) {
    938         if (++error_count > kErrorReportLimit) continue;
    939 
    940         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
    941                name,
    942                bits / 4,
    943                static_cast<uint64_t>(inputs[n]),
    944                bits / 4,
    945                0,
    946                name,
    947                rawbits_to_fp(inputs[n]));
    948         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
    949                (expected[d] & 0x8) ? 'N' : 'n',
    950                (expected[d] & 0x4) ? 'Z' : 'z',
    951                (expected[d] & 0x2) ? 'C' : 'c',
    952                (expected[d] & 0x1) ? 'V' : 'v',
    953                expected[d]);
    954         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
    955                (results[d] & 0x8) ? 'N' : 'n',
    956                (results[d] & 0x4) ? 'Z' : 'z',
    957                (results[d] & 0x2) ? 'C' : 'c',
    958                (results[d] & 0x1) ? 'V' : 'v',
    959                results[d]);
    960         printf("\n");
    961       }
    962     }
    963     VIXL_ASSERT(d == expected_length);
    964     if (error_count > kErrorReportLimit) {
    965       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    966     }
    967     VIXL_CHECK(error_count == 0);
    968   }
    969   delete[] results;
    970 }
    971 
    972 
    973 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
    974                                  uintptr_t inputs,
    975                                  unsigned inputs_length,
    976                                  uintptr_t results,
    977                                  unsigned d_size,
    978                                  unsigned n_size,
    979                                  bool* skipped) {
    980   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
    981   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
    982               (n_size == kHRegSize));
    983 
    984   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
    985   START();
    986 
    987   // Roll up the loop to keep the code size down.
    988   Label loop_n;
    989 
    990   Register out = x0;
    991   Register inputs_base = x1;
    992   Register length = w2;
    993   Register index_n = w3;
    994 
    995   int n_index_shift;
    996   if (n_size == kDRegSize) {
    997     n_index_shift = kDRegSizeInBytesLog2;
    998   } else if (n_size == kSRegSize) {
    999     n_index_shift = kSRegSizeInBytesLog2;
   1000   } else {
   1001     n_index_shift = kHRegSizeInBytesLog2;
   1002   }
   1003 
   1004   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
   1005   FPRegister fn;
   1006   if (n_size == kDRegSize) {
   1007     fn = d1;
   1008   } else if (n_size == kSRegSize) {
   1009     fn = s1;
   1010   } else {
   1011     fn = h1;
   1012   }
   1013 
   1014   __ Mov(out, results);
   1015   __ Mov(inputs_base, inputs);
   1016   __ Mov(length, inputs_length);
   1017 
   1018   __ Mov(index_n, 0);
   1019   __ Bind(&loop_n);
   1020   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
   1021 
   1022   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
   1023     {
   1024       SingleEmissionCheckScope guard(&masm);
   1025       (masm.*helper)(rd, fn, fbits);
   1026     }
   1027     __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
   1028   }
   1029 
   1030   __ Add(index_n, index_n, 1);
   1031   __ Cmp(index_n, inputs_length);
   1032   __ B(lo, &loop_n);
   1033 
   1034   END();
   1035   TRY_RUN(skipped);
   1036   TEARDOWN();
   1037 }
   1038 
   1039 
   1040 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
   1041                                uintptr_t inputs,
   1042                                unsigned inputs_length,
   1043                                uintptr_t results,
   1044                                unsigned d_size,
   1045                                unsigned n_size,
   1046                                bool* skipped) {
   1047   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
   1048   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
   1049               (n_size == kHRegSize));
   1050 
   1051   SETUP_WITH_FEATURES(CPUFeatures::kFP,
   1052                       CPUFeatures::kFPHalf,
   1053                       CPUFeatures::kJSCVT);
   1054   START();
   1055 
   1056   // Roll up the loop to keep the code size down.
   1057   Label loop_n;
   1058 
   1059   Register out = x0;
   1060   Register inputs_base = x1;
   1061   Register length = w2;
   1062   Register index_n = w3;
   1063 
   1064   int n_index_shift;
   1065   if (n_size == kDRegSize) {
   1066     n_index_shift = kDRegSizeInBytesLog2;
   1067   } else if (n_size == kSRegSize) {
   1068     n_index_shift = kSRegSizeInBytesLog2;
   1069   } else {
   1070     n_index_shift = kHRegSizeInBytesLog2;
   1071   }
   1072 
   1073   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
   1074   FPRegister fn;
   1075   if (n_size == kDRegSize) {
   1076     fn = d1;
   1077   } else if (n_size == kSRegSize) {
   1078     fn = s1;
   1079   } else {
   1080     fn = h1;
   1081   }
   1082 
   1083   __ Mov(out, results);
   1084   __ Mov(inputs_base, inputs);
   1085   __ Mov(length, inputs_length);
   1086 
   1087   __ Mov(index_n, 0);
   1088   __ Bind(&loop_n);
   1089   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
   1090 
   1091   {
   1092     SingleEmissionCheckScope guard(&masm);
   1093     (masm.*helper)(rd, fn);
   1094   }
   1095   __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
   1096 
   1097   __ Add(index_n, index_n, 1);
   1098   __ Cmp(index_n, inputs_length);
   1099   __ B(lo, &loop_n);
   1100 
   1101   END();
   1102   TRY_RUN(skipped);
   1103   TEARDOWN();
   1104 }
   1105 
   1106 
   1107 // Test FP instructions.
   1108 //  - The inputs[] array should be an array of rawbits representations of
   1109 //    doubles or floats. This ensures that exact bit comparisons can be
   1110 //    performed.
   1111 //  - The expected[] array should be an array of signed integers.
   1112 template <typename Tn, typename Td>
   1113 static void TestFPToS(const char* name,
   1114                       TestFPToIntHelper_t helper,
   1115                       const Tn inputs[],
   1116                       unsigned inputs_length,
   1117                       const Td expected[],
   1118                       unsigned expected_length) {
   1119   VIXL_ASSERT(inputs_length > 0);
   1120 
   1121   const unsigned results_length = inputs_length;
   1122   Td* results = new Td[results_length];
   1123 
   1124   const unsigned d_bits = sizeof(Td) * 8;
   1125   const unsigned n_bits = sizeof(Tn) * 8;
   1126   bool skipped;
   1127 
   1128   TestFPToInt_Helper(helper,
   1129                      reinterpret_cast<uintptr_t>(inputs),
   1130                      inputs_length,
   1131                      reinterpret_cast<uintptr_t>(results),
   1132                      d_bits,
   1133                      n_bits,
   1134                      &skipped);
   1135 
   1136   if (Test::generate_test_trace()) {
   1137     // Print the results.
   1138     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
   1139     // There is no simple C++ literal for INT*_MIN that doesn't produce
   1140     // warnings, so we use an appropriate constant in that case instead.
   1141     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
   1142     // the like) avoids warnings about comparing values with differing ranges.
   1143     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
   1144     const int64_t int_d_min = -(int_d_max)-1;
   1145     for (unsigned d = 0; d < results_length; d++) {
   1146       if (results[d] == int_d_min) {
   1147         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
   1148       } else {
   1149         // Some constants (such as those between INT32_MAX and UINT32_MAX)
   1150         // trigger compiler warnings. To avoid these warnings, use an
   1151         // appropriate macro to make the type explicit.
   1152         int64_t result_int64 = static_cast<int64_t>(results[d]);
   1153         if (result_int64 >= 0) {
   1154           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
   1155         } else {
   1156           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
   1157         }
   1158       }
   1159     }
   1160     printf("};\n");
   1161     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1162   } else if (!skipped) {
   1163     // Check the results.
   1164     VIXL_CHECK(expected_length == results_length);
   1165     unsigned error_count = 0;
   1166     unsigned d = 0;
   1167     for (unsigned n = 0; n < inputs_length; n++, d++) {
   1168       if (results[d] != expected[d]) {
   1169         if (++error_count > kErrorReportLimit) continue;
   1170 
   1171         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
   1172                name,
   1173                n_bits / 4,
   1174                static_cast<uint64_t>(inputs[n]),
   1175                name,
   1176                rawbits_to_fp(inputs[n]));
   1177         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
   1178                d_bits / 4,
   1179                static_cast<uint64_t>(expected[d]),
   1180                static_cast<int64_t>(expected[d]));
   1181         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
   1182                d_bits / 4,
   1183                static_cast<uint64_t>(results[d]),
   1184                static_cast<int64_t>(results[d]));
   1185         printf("\n");
   1186       }
   1187     }
   1188     VIXL_ASSERT(d == expected_length);
   1189     if (error_count > kErrorReportLimit) {
   1190       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1191     }
   1192     VIXL_CHECK(error_count == 0);
   1193   }
   1194   delete[] results;
   1195 }
   1196 
   1197 
   1198 // Test FP instructions.
   1199 //  - The inputs[] array should be an array of rawbits representations of
   1200 //    doubles or floats. This ensures that exact bit comparisons can be
   1201 //    performed.
   1202 //  - The expected[] array should be an array of unsigned integers.
   1203 template <typename Tn, typename Td>
   1204 static void TestFPToU(const char* name,
   1205                       TestFPToIntHelper_t helper,
   1206                       const Tn inputs[],
   1207                       unsigned inputs_length,
   1208                       const Td expected[],
   1209                       unsigned expected_length) {
   1210   VIXL_ASSERT(inputs_length > 0);
   1211 
   1212   const unsigned results_length = inputs_length;
   1213   Td* results = new Td[results_length];
   1214 
   1215   const unsigned d_bits = sizeof(Td) * 8;
   1216   const unsigned n_bits = sizeof(Tn) * 8;
   1217   bool skipped;
   1218 
   1219   TestFPToInt_Helper(helper,
   1220                      reinterpret_cast<uintptr_t>(inputs),
   1221                      inputs_length,
   1222                      reinterpret_cast<uintptr_t>(results),
   1223                      d_bits,
   1224                      n_bits,
   1225                      &skipped);
   1226 
   1227   if (Test::generate_test_trace()) {
   1228     // Print the results.
   1229     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
   1230     for (unsigned d = 0; d < results_length; d++) {
   1231       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
   1232     }
   1233     printf("};\n");
   1234     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1235   } else if (!skipped) {
   1236     // Check the results.
   1237     VIXL_CHECK(expected_length == results_length);
   1238     unsigned error_count = 0;
   1239     unsigned d = 0;
   1240     for (unsigned n = 0; n < inputs_length; n++, d++) {
   1241       if (results[d] != expected[d]) {
   1242         if (++error_count > kErrorReportLimit) continue;
   1243 
   1244         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
   1245                name,
   1246                n_bits / 4,
   1247                static_cast<uint64_t>(inputs[n]),
   1248                name,
   1249                rawbits_to_fp(inputs[n]));
   1250         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1251                d_bits / 4,
   1252                static_cast<uint64_t>(expected[d]),
   1253                static_cast<uint64_t>(expected[d]));
   1254         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1255                d_bits / 4,
   1256                static_cast<uint64_t>(results[d]),
   1257                static_cast<uint64_t>(results[d]));
   1258         printf("\n");
   1259       }
   1260     }
   1261     VIXL_ASSERT(d == expected_length);
   1262     if (error_count > kErrorReportLimit) {
   1263       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1264     }
   1265     VIXL_CHECK(error_count == 0);
   1266   }
   1267   delete[] results;
   1268 }
   1269 
   1270 
   1271 // Test FP instructions.
   1272 //  - The inputs[] array should be an array of rawbits representations of
   1273 //    doubles or floats. This ensures that exact bit comparisons can be
   1274 //    performed.
   1275 //  - The expected[] array should be an array of signed integers.
   1276 template <typename Tn, typename Td>
   1277 static void TestFPToFixedS(const char* name,
   1278                            TestFPToFixedHelper_t helper,
   1279                            const Tn inputs[],
   1280                            unsigned inputs_length,
   1281                            const Td expected[],
   1282                            unsigned expected_length) {
   1283   VIXL_ASSERT(inputs_length > 0);
   1284 
   1285   const unsigned d_bits = sizeof(Td) * 8;
   1286   const unsigned n_bits = sizeof(Tn) * 8;
   1287 
   1288   const unsigned results_length = inputs_length * (d_bits + 1);
   1289   Td* results = new Td[results_length];
   1290 
   1291   bool skipped;
   1292 
   1293   TestFPToFixed_Helper(helper,
   1294                        reinterpret_cast<uintptr_t>(inputs),
   1295                        inputs_length,
   1296                        reinterpret_cast<uintptr_t>(results),
   1297                        d_bits,
   1298                        n_bits,
   1299                        &skipped);
   1300 
   1301   if (Test::generate_test_trace()) {
   1302     // Print the results.
   1303     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
   1304     // There is no simple C++ literal for INT*_MIN that doesn't produce
   1305     // warnings, so we use an appropriate constant in that case instead.
   1306     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
   1307     // the like) avoids warnings about comparing values with differing ranges.
   1308     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
   1309     const int64_t int_d_min = -(int_d_max)-1;
   1310     for (unsigned d = 0; d < results_length; d++) {
   1311       if (results[d] == int_d_min) {
   1312         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
   1313       } else {
   1314         // Some constants (such as those between INT32_MAX and UINT32_MAX)
   1315         // trigger compiler warnings. To avoid these warnings, use an
   1316         // appropriate macro to make the type explicit.
   1317         int64_t result_int64 = static_cast<int64_t>(results[d]);
   1318         if (result_int64 >= 0) {
   1319           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
   1320         } else {
   1321           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
   1322         }
   1323       }
   1324     }
   1325     printf("};\n");
   1326     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1327   } else if (!skipped) {
   1328     // Check the results.
   1329     VIXL_CHECK(expected_length == results_length);
   1330     unsigned error_count = 0;
   1331     unsigned d = 0;
   1332     for (unsigned n = 0; n < inputs_length; n++) {
   1333       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
   1334         if (results[d] != expected[d]) {
   1335           if (++error_count > kErrorReportLimit) continue;
   1336 
   1337           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
   1338                  name,
   1339                  n_bits / 4,
   1340                  static_cast<uint64_t>(inputs[n]),
   1341                  fbits,
   1342                  name,
   1343                  rawbits_to_fp(inputs[n]),
   1344                  fbits);
   1345           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
   1346                  d_bits / 4,
   1347                  static_cast<uint64_t>(expected[d]),
   1348                  static_cast<int64_t>(expected[d]));
   1349           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
   1350                  d_bits / 4,
   1351                  static_cast<uint64_t>(results[d]),
   1352                  static_cast<int64_t>(results[d]));
   1353           printf("\n");
   1354         }
   1355       }
   1356     }
   1357     VIXL_ASSERT(d == expected_length);
   1358     if (error_count > kErrorReportLimit) {
   1359       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1360     }
   1361     VIXL_CHECK(error_count == 0);
   1362   }
   1363   delete[] results;
   1364 }
   1365 
   1366 
   1367 // Test FP instructions.
   1368 //  - The inputs[] array should be an array of rawbits representations of
   1369 //    doubles or floats. This ensures that exact bit comparisons can be
   1370 //    performed.
   1371 //  - The expected[] array should be an array of unsigned integers.
   1372 template <typename Tn, typename Td>
   1373 static void TestFPToFixedU(const char* name,
   1374                            TestFPToFixedHelper_t helper,
   1375                            const Tn inputs[],
   1376                            unsigned inputs_length,
   1377                            const Td expected[],
   1378                            unsigned expected_length) {
   1379   VIXL_ASSERT(inputs_length > 0);
   1380 
   1381   const unsigned d_bits = sizeof(Td) * 8;
   1382   const unsigned n_bits = sizeof(Tn) * 8;
   1383 
   1384   const unsigned results_length = inputs_length * (d_bits + 1);
   1385   Td* results = new Td[results_length];
   1386 
   1387   bool skipped;
   1388 
   1389   TestFPToFixed_Helper(helper,
   1390                        reinterpret_cast<uintptr_t>(inputs),
   1391                        inputs_length,
   1392                        reinterpret_cast<uintptr_t>(results),
   1393                        d_bits,
   1394                        n_bits,
   1395                        &skipped);
   1396 
   1397   if (Test::generate_test_trace()) {
   1398     // Print the results.
   1399     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
   1400     for (unsigned d = 0; d < results_length; d++) {
   1401       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
   1402     }
   1403     printf("};\n");
   1404     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1405   } else if (!skipped) {
   1406     // Check the results.
   1407     VIXL_CHECK(expected_length == results_length);
   1408     unsigned error_count = 0;
   1409     unsigned d = 0;
   1410     for (unsigned n = 0; n < inputs_length; n++) {
   1411       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
   1412         if (results[d] != expected[d]) {
   1413           if (++error_count > kErrorReportLimit) continue;
   1414 
   1415           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
   1416                  name,
   1417                  n_bits / 4,
   1418                  static_cast<uint64_t>(inputs[n]),
   1419                  fbits,
   1420                  name,
   1421                  rawbits_to_fp(inputs[n]),
   1422                  fbits);
   1423           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1424                  d_bits / 4,
   1425                  static_cast<uint64_t>(expected[d]),
   1426                  static_cast<uint64_t>(expected[d]));
   1427           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1428                  d_bits / 4,
   1429                  static_cast<uint64_t>(results[d]),
   1430                  static_cast<uint64_t>(results[d]));
   1431           printf("\n");
   1432         }
   1433       }
   1434     }
   1435     VIXL_ASSERT(d == expected_length);
   1436     if (error_count > kErrorReportLimit) {
   1437       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1438     }
   1439     VIXL_CHECK(error_count == 0);
   1440   }
   1441   delete[] results;
   1442 }
   1443 
   1444 
   1445 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
   1446 
   1447 
   1448 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
   1449                                uintptr_t inputs_n,
   1450                                unsigned inputs_n_length,
   1451                                uintptr_t results,
   1452                                VectorFormat vd_form,
   1453                                VectorFormat vn_form,
   1454                                bool* skipped) {
   1455   VIXL_ASSERT(vd_form != kFormatUndefined);
   1456   VIXL_ASSERT(vn_form != kFormatUndefined);
   1457 
   1458   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
   1459                       CPUFeatures::kFP,
   1460                       CPUFeatures::kRDM,
   1461                       CPUFeatures::kNEONHalf);
   1462   START();
   1463 
   1464   // Roll up the loop to keep the code size down.
   1465   Label loop_n;
   1466 
   1467   Register out = x0;
   1468   Register inputs_n_base = x1;
   1469   Register inputs_n_last_16bytes = x3;
   1470   Register index_n = x5;
   1471 
   1472   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1473   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1474   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1475 
   1476   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1477   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1478   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1479   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1480   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1481 
   1482 
   1483   // These will be either a D- or a Q-register form, with a single lane
   1484   // (for use in scalar load and store operations).
   1485   VRegister vd = VRegister(0, vd_bits);
   1486   VRegister vn = v1.V16B();
   1487   VRegister vntmp = v3.V16B();
   1488 
   1489   // These will have the correct format for use when calling 'helper'.
   1490   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
   1491   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1492 
   1493   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1494   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1495 
   1496   __ Mov(out, results);
   1497 
   1498   __ Mov(inputs_n_base, inputs_n);
   1499   __ Mov(inputs_n_last_16bytes,
   1500          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
   1501 
   1502   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   1503 
   1504   __ Mov(index_n, 0);
   1505   __ Bind(&loop_n);
   1506 
   1507   __ Ldr(vntmp_single,
   1508          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   1509   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   1510 
   1511   // Set the destination to zero.
   1512   // TODO: Setting the destination to values other than zero
   1513   //       might be a better test for instructions such as sqxtn2
   1514   //       which may leave parts of V registers unchanged.
   1515   __ Movi(vd.V16B(), 0);
   1516 
   1517   {
   1518     SingleEmissionCheckScope guard(&masm);
   1519     (masm.*helper)(vd_helper, vn_helper);
   1520   }
   1521   __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   1522 
   1523   __ Add(index_n, index_n, 1);
   1524   __ Cmp(index_n, inputs_n_length);
   1525   __ B(lo, &loop_n);
   1526 
   1527   END();
   1528   TRY_RUN(skipped);
   1529   TEARDOWN();
   1530 }
   1531 
   1532 
   1533 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1534 // arrays of rawbit representation of input values. This ensures that
   1535 // exact bit comparisons can be performed.
   1536 template <typename Td, typename Tn>
   1537 static void Test1OpNEON(const char* name,
   1538                         Test1OpNEONHelper_t helper,
   1539                         const Tn inputs_n[],
   1540                         unsigned inputs_n_length,
   1541                         const Td expected[],
   1542                         unsigned expected_length,
   1543                         VectorFormat vd_form,
   1544                         VectorFormat vn_form) {
   1545   VIXL_ASSERT(inputs_n_length > 0);
   1546 
   1547   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1548   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1549   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1550 
   1551   const unsigned results_length = inputs_n_length;
   1552   Td* results = new Td[results_length * vd_lane_count];
   1553   const unsigned lane_bit = sizeof(Td) * 8;
   1554   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   1555 
   1556   bool skipped;
   1557 
   1558   Test1OpNEON_Helper(helper,
   1559                      reinterpret_cast<uintptr_t>(inputs_n),
   1560                      inputs_n_length,
   1561                      reinterpret_cast<uintptr_t>(results),
   1562                      vd_form,
   1563                      vn_form,
   1564                      &skipped);
   1565 
   1566   if (Test::generate_test_trace()) {
   1567     // Print the results.
   1568     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1569     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1570       printf(" ");
   1571       // Output a separate result for each element of the result vector.
   1572       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1573         unsigned index = lane + (iteration * vd_lane_count);
   1574         printf(" 0x%0*" PRIx64 ",",
   1575                lane_len_in_hex,
   1576                static_cast<uint64_t>(results[index]));
   1577       }
   1578       printf("\n");
   1579     }
   1580 
   1581     printf("};\n");
   1582     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1583            name,
   1584            results_length);
   1585   } else if (!skipped) {
   1586     // Check the results.
   1587     VIXL_CHECK(expected_length == results_length);
   1588     unsigned error_count = 0;
   1589     unsigned d = 0;
   1590     const char* padding = "                    ";
   1591     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1592     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
   1593       bool error_in_vector = false;
   1594 
   1595       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1596         unsigned output_index = (n * vd_lane_count) + lane;
   1597 
   1598         if (results[output_index] != expected[output_index]) {
   1599           error_in_vector = true;
   1600           break;
   1601         }
   1602       }
   1603 
   1604       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1605         printf("%s\n", name);
   1606         printf(" Vn%.*s| Vd%.*s| Expected\n",
   1607                lane_len_in_hex + 1,
   1608                padding,
   1609                lane_len_in_hex + 1,
   1610                padding);
   1611 
   1612         const unsigned first_index_n =
   1613             inputs_n_length - (16 / vn_lane_bytes) + n + 1;
   1614 
   1615         for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
   1616              lane++) {
   1617           unsigned output_index = (n * vd_lane_count) + lane;
   1618           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
   1619 
   1620           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
   1621                  " "
   1622                  "| 0x%0*" PRIx64 "\n",
   1623                  results[output_index] != expected[output_index] ? '*' : ' ',
   1624                  lane_len_in_hex,
   1625                  static_cast<uint64_t>(inputs_n[input_index_n]),
   1626                  lane_len_in_hex,
   1627                  static_cast<uint64_t>(results[output_index]),
   1628                  lane_len_in_hex,
   1629                  static_cast<uint64_t>(expected[output_index]));
   1630         }
   1631       }
   1632     }
   1633     VIXL_ASSERT(d == expected_length);
   1634     if (error_count > kErrorReportLimit) {
   1635       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1636     }
   1637     VIXL_CHECK(error_count == 0);
   1638   }
   1639   delete[] results;
   1640 }
   1641 
   1642 
   1643 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
   1644 //      where <V> is one of B, H, S or D registers.
   1645 //      e.g. saddlv H1, v0.8B
   1646 
   1647 // TODO: Change tests to store all lanes of the resulting V register.
   1648 //       Some tests store all 128 bits of the resulting V register to
   1649 //       check the simulator's behaviour on the rest of the register.
   1650 //       This is better than storing the affected lanes only.
   1651 //       Change any tests such as the 'Across' template to do the same.
   1652 
   1653 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
   1654                                      uintptr_t inputs_n,
   1655                                      unsigned inputs_n_length,
   1656                                      uintptr_t results,
   1657                                      VectorFormat vd_form,
   1658                                      VectorFormat vn_form,
   1659                                      bool* skipped) {
   1660   VIXL_ASSERT(vd_form != kFormatUndefined);
   1661   VIXL_ASSERT(vn_form != kFormatUndefined);
   1662 
   1663   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
   1664                       CPUFeatures::kFP,
   1665                       CPUFeatures::kNEONHalf);
   1666   START();
   1667 
   1668   // Roll up the loop to keep the code size down.
   1669   Label loop_n;
   1670 
   1671   Register out = x0;
   1672   Register inputs_n_base = x1;
   1673   Register inputs_n_last_vector = x3;
   1674   Register index_n = x5;
   1675 
   1676   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1677   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1678   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1679   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1680   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1681   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1682   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1683 
   1684   // Test destructive operations by (arbitrarily) using the same register for
   1685   // B and S lane sizes.
   1686   bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
   1687 
   1688   // Create two aliases for v0; the first is the destination for the tested
   1689   // instruction, the second, the whole Q register to check the results.
   1690   VRegister vd = VRegister(0, vd_bits);
   1691   VRegister vdstr = VRegister(0, kQRegSize);
   1692 
   1693   VRegister vn = VRegister(1, vn_bits);
   1694   VRegister vntmp = VRegister(3, vn_bits);
   1695 
   1696   // These will have the correct format for use when calling 'helper'.
   1697   VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
   1698   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1699 
   1700   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1701   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1702 
   1703   // Same registers for use in the 'ext' instructions.
   1704   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
   1705   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
   1706 
   1707   __ Mov(out, results);
   1708 
   1709   __ Mov(inputs_n_base, inputs_n);
   1710   __ Mov(inputs_n_last_vector,
   1711          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
   1712 
   1713   __ Ldr(vn, MemOperand(inputs_n_last_vector));
   1714 
   1715   __ Mov(index_n, 0);
   1716   __ Bind(&loop_n);
   1717 
   1718   __ Ldr(vntmp_single,
   1719          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   1720   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
   1721 
   1722   if (destructive) {
   1723     __ Mov(vd_helper, vn_helper);
   1724     SingleEmissionCheckScope guard(&masm);
   1725     (masm.*helper)(vd, vd_helper);
   1726   } else {
   1727     SingleEmissionCheckScope guard(&masm);
   1728     (masm.*helper)(vd, vn_helper);
   1729   }
   1730 
   1731   __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
   1732 
   1733   __ Add(index_n, index_n, 1);
   1734   __ Cmp(index_n, inputs_n_length);
   1735   __ B(lo, &loop_n);
   1736 
   1737   END();
   1738   TRY_RUN(skipped);
   1739   TEARDOWN();
   1740 }
   1741 
   1742 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1743 // arrays of rawbit representation of input values. This ensures that
   1744 // exact bit comparisons can be performed.
   1745 template <typename Td, typename Tn>
   1746 static void Test1OpAcrossNEON(const char* name,
   1747                               Test1OpNEONHelper_t helper,
   1748                               const Tn inputs_n[],
   1749                               unsigned inputs_n_length,
   1750                               const Td expected[],
   1751                               unsigned expected_length,
   1752                               VectorFormat vd_form,
   1753                               VectorFormat vn_form) {
   1754   VIXL_ASSERT(inputs_n_length > 0);
   1755 
   1756   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1757   const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
   1758 
   1759   const unsigned results_length = inputs_n_length;
   1760   Td* results = new Td[results_length * vd_lanes_per_q];
   1761   const unsigned lane_bit = sizeof(Td) * 8;
   1762   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   1763 
   1764   bool skipped;
   1765 
   1766   Test1OpAcrossNEON_Helper(helper,
   1767                            reinterpret_cast<uintptr_t>(inputs_n),
   1768                            inputs_n_length,
   1769                            reinterpret_cast<uintptr_t>(results),
   1770                            vd_form,
   1771                            vn_form,
   1772                            &skipped);
   1773 
   1774   if (Test::generate_test_trace()) {
   1775     // Print the results.
   1776     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1777     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1778       printf(" ");
   1779       // Output a separate result for each element of the result vector.
   1780       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1781         unsigned index = lane + (iteration * vd_lanes_per_q);
   1782         printf(" 0x%0*" PRIx64 ",",
   1783                lane_len_in_hex,
   1784                static_cast<uint64_t>(results[index]));
   1785       }
   1786       printf("\n");
   1787     }
   1788 
   1789     printf("};\n");
   1790     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1791            name,
   1792            results_length);
   1793   } else if (!skipped) {
   1794     // Check the results.
   1795     VIXL_CHECK(expected_length == results_length);
   1796     unsigned error_count = 0;
   1797     unsigned d = 0;
   1798     const char* padding = "                    ";
   1799     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1800     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
   1801       bool error_in_vector = false;
   1802 
   1803       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1804         unsigned expected_index = (n * vd_lane_count) + lane;
   1805         unsigned results_index = (n * vd_lanes_per_q) + lane;
   1806 
   1807         if (results[results_index] != expected[expected_index]) {
   1808           error_in_vector = true;
   1809           break;
   1810         }
   1811       }
   1812 
   1813       // For across operations, the remaining lanes should be zero.
   1814       for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
   1815         unsigned results_index = (n * vd_lanes_per_q) + lane;
   1816         if (results[results_index] != 0) {
   1817           error_in_vector = true;
   1818           break;
   1819         }
   1820       }
   1821 
   1822       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1823         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1824 
   1825         printf("%s\n", name);
   1826         printf(" Vn%.*s| Vd%.*s| Expected\n",
   1827                lane_len_in_hex + 1,
   1828                padding,
   1829                lane_len_in_hex + 1,
   1830                padding);
   1831 
   1832         // TODO: In case of an error, all tests print out as many elements as
   1833         //       there are lanes in the output or input vectors. This way
   1834         //       the viewer can read all the values that were needed for the
   1835         //       operation but the output contains also unnecessary values.
   1836         //       These prints can be improved according to the arguments
   1837         //       passed to test functions.
   1838         //       This output for the 'Across' category has the required
   1839         //       modifications.
   1840         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
   1841           unsigned results_index =
   1842               (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
   1843           unsigned input_index_n =
   1844               (inputs_n_length - vn_lane_count + n + 1 + lane) %
   1845               inputs_n_length;
   1846 
   1847           Td expect = 0;
   1848           if ((vn_lane_count - 1) == lane) {
   1849             // This is the last lane to be printed, ie. the least-significant
   1850             // lane, so use the expected value; any other lane should be zero.
   1851             unsigned expected_index = n * vd_lane_count;
   1852             expect = expected[expected_index];
   1853           }
   1854           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   1855                  results[results_index] != expect ? '*' : ' ',
   1856                  lane_len_in_hex,
   1857                  static_cast<uint64_t>(inputs_n[input_index_n]),
   1858                  lane_len_in_hex,
   1859                  static_cast<uint64_t>(results[results_index]),
   1860                  lane_len_in_hex,
   1861                  static_cast<uint64_t>(expect));
   1862         }
   1863       }
   1864     }
   1865     VIXL_ASSERT(d == expected_length);
   1866     if (error_count > kErrorReportLimit) {
   1867       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1868     }
   1869     VIXL_CHECK(error_count == 0);
   1870   }
   1871   delete[] results;
   1872 }
   1873 
   1874 
   1875 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
   1876 
   1877 // TODO: Iterate over inputs_d once the traces file is split.
   1878 
   1879 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
   1880                                uintptr_t inputs_d,
   1881                                uintptr_t inputs_n,
   1882                                unsigned inputs_n_length,
   1883                                uintptr_t inputs_m,
   1884                                unsigned inputs_m_length,
   1885                                uintptr_t results,
   1886                                VectorFormat vd_form,
   1887                                VectorFormat vn_form,
   1888                                VectorFormat vm_form,
   1889                                bool* skipped) {
   1890   VIXL_ASSERT(vd_form != kFormatUndefined);
   1891   VIXL_ASSERT(vn_form != kFormatUndefined);
   1892   VIXL_ASSERT(vm_form != kFormatUndefined);
   1893 
   1894   CPUFeatures features;
   1895   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
   1896   features.Combine(CPUFeatures::kFP);
   1897   features.Combine(CPUFeatures::kRDM);
   1898   features.Combine(CPUFeatures::kDotProduct);
   1899   SETUP_WITH_FEATURES(features);
   1900   START();
   1901 
   1902   // Roll up the loop to keep the code size down.
   1903   Label loop_n, loop_m;
   1904 
   1905   Register out = x0;
   1906   Register inputs_n_base = x1;
   1907   Register inputs_m_base = x2;
   1908   Register inputs_d_base = x3;
   1909   Register inputs_n_last_16bytes = x4;
   1910   Register inputs_m_last_16bytes = x5;
   1911   Register index_n = x6;
   1912   Register index_m = x7;
   1913 
   1914   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1915   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1916   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1917 
   1918   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1919   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1920   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1921   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1922   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1923 
   1924   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
   1925   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
   1926   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
   1927   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
   1928   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
   1929 
   1930 
   1931   // Always load and store 128 bits regardless of the format.
   1932   VRegister vd = v0.V16B();
   1933   VRegister vn = v1.V16B();
   1934   VRegister vm = v2.V16B();
   1935   VRegister vntmp = v3.V16B();
   1936   VRegister vmtmp = v4.V16B();
   1937   VRegister vres = v5.V16B();
   1938 
   1939   // These will have the correct format for calling the 'helper'.
   1940   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1941   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
   1942   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   1943 
   1944   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1945   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1946   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
   1947 
   1948   __ Mov(out, results);
   1949 
   1950   __ Mov(inputs_d_base, inputs_d);
   1951 
   1952   __ Mov(inputs_n_base, inputs_n);
   1953   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
   1954   __ Mov(inputs_m_base, inputs_m);
   1955   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
   1956 
   1957   __ Ldr(vd, MemOperand(inputs_d_base));
   1958   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   1959   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
   1960 
   1961   __ Mov(index_n, 0);
   1962   __ Bind(&loop_n);
   1963 
   1964   __ Ldr(vntmp_single,
   1965          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   1966   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   1967 
   1968   __ Mov(index_m, 0);
   1969   __ Bind(&loop_m);
   1970 
   1971   __ Ldr(vmtmp_single,
   1972          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
   1973   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
   1974 
   1975   __ Mov(vres, vd);
   1976   {
   1977     SingleEmissionCheckScope guard(&masm);
   1978     (masm.*helper)(vres_helper, vn_helper, vm_helper);
   1979   }
   1980   __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   1981 
   1982   __ Add(index_m, index_m, 1);
   1983   __ Cmp(index_m, inputs_m_length);
   1984   __ B(lo, &loop_m);
   1985 
   1986   __ Add(index_n, index_n, 1);
   1987   __ Cmp(index_n, inputs_n_length);
   1988   __ B(lo, &loop_n);
   1989 
   1990   END();
   1991   TRY_RUN(skipped);
   1992   TEARDOWN();
   1993 }
   1994 
   1995 
   1996 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1997 // arrays of rawbit representation of input values. This ensures that
   1998 // exact bit comparisons can be performed.
   1999 template <typename Td, typename Tn, typename Tm>
   2000 static void Test2OpNEON(const char* name,
   2001                         Test2OpNEONHelper_t helper,
   2002                         const Td inputs_d[],
   2003                         const Tn inputs_n[],
   2004                         unsigned inputs_n_length,
   2005                         const Tm inputs_m[],
   2006                         unsigned inputs_m_length,
   2007                         const Td expected[],
   2008                         unsigned expected_length,
   2009                         VectorFormat vd_form,
   2010                         VectorFormat vn_form,
   2011                         VectorFormat vm_form) {
   2012   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
   2013 
   2014   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
   2015 
   2016   const unsigned results_length = inputs_n_length * inputs_m_length;
   2017   Td* results = new Td[results_length * vd_lane_count];
   2018   const unsigned lane_bit = sizeof(Td) * 8;
   2019   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
   2020 
   2021   bool skipped;
   2022 
   2023   Test2OpNEON_Helper(helper,
   2024                      reinterpret_cast<uintptr_t>(inputs_d),
   2025                      reinterpret_cast<uintptr_t>(inputs_n),
   2026                      inputs_n_length,
   2027                      reinterpret_cast<uintptr_t>(inputs_m),
   2028                      inputs_m_length,
   2029                      reinterpret_cast<uintptr_t>(results),
   2030                      vd_form,
   2031                      vn_form,
   2032                      vm_form,
   2033                      &skipped);
   2034 
   2035   if (Test::generate_test_trace()) {
   2036     // Print the results.
   2037     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2038     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2039       printf(" ");
   2040       // Output a separate result for each element of the result vector.
   2041       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2042         unsigned index = lane + (iteration * vd_lane_count);
   2043         printf(" 0x%0*" PRIx64 ",",
   2044                lane_len_in_hex,
   2045                static_cast<uint64_t>(results[index]));
   2046       }
   2047       printf("\n");
   2048     }
   2049 
   2050     printf("};\n");
   2051     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2052            name,
   2053            results_length);
   2054   } else if (!skipped) {
   2055     // Check the results.
   2056     VIXL_CHECK(expected_length == results_length);
   2057     unsigned error_count = 0;
   2058     unsigned d = 0;
   2059     const char* padding = "                    ";
   2060     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2061     for (unsigned n = 0; n < inputs_n_length; n++) {
   2062       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
   2063         bool error_in_vector = false;
   2064 
   2065         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2066           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2067                                   (m * vd_lane_count) + lane;
   2068 
   2069           if (results[output_index] != expected[output_index]) {
   2070             error_in_vector = true;
   2071             break;
   2072           }
   2073         }
   2074 
   2075         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2076           printf("%s\n", name);
   2077           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
   2078                  lane_len_in_hex + 1,
   2079                  padding,
   2080                  lane_len_in_hex + 1,
   2081                  padding,
   2082                  lane_len_in_hex + 1,
   2083                  padding,
   2084                  lane_len_in_hex + 1,
   2085                  padding);
   2086 
   2087           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2088             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2089                                     (m * vd_lane_count) + lane;
   2090             unsigned input_index_n =
   2091                 (inputs_n_length - vd_lane_count + n + 1 + lane) %
   2092                 inputs_n_length;
   2093             unsigned input_index_m =
   2094                 (inputs_m_length - vd_lane_count + m + 1 + lane) %
   2095                 inputs_m_length;
   2096 
   2097             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
   2098                    " "
   2099                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2100                    results[output_index] != expected[output_index] ? '*' : ' ',
   2101                    lane_len_in_hex,
   2102                    static_cast<uint64_t>(inputs_d[lane]),
   2103                    lane_len_in_hex,
   2104                    static_cast<uint64_t>(inputs_n[input_index_n]),
   2105                    lane_len_in_hex,
   2106                    static_cast<uint64_t>(inputs_m[input_index_m]),
   2107                    lane_len_in_hex,
   2108                    static_cast<uint64_t>(results[output_index]),
   2109                    lane_len_in_hex,
   2110                    static_cast<uint64_t>(expected[output_index]));
   2111           }
   2112         }
   2113       }
   2114     }
   2115     VIXL_ASSERT(d == expected_length);
   2116     if (error_count > kErrorReportLimit) {
   2117       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2118     }
   2119     VIXL_CHECK(error_count == 0);
   2120   }
   2121   delete[] results;
   2122 }
   2123 
   2124 
   2125 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
   2126 
   2127 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
   2128                                      uintptr_t inputs_d,
   2129                                      uintptr_t inputs_n,
   2130                                      unsigned inputs_n_length,
   2131                                      uintptr_t inputs_m,
   2132                                      unsigned inputs_m_length,
   2133                                      const int indices[],
   2134                                      unsigned indices_length,
   2135                                      uintptr_t results,
   2136                                      VectorFormat vd_form,
   2137                                      VectorFormat vn_form,
   2138                                      VectorFormat vm_form,
   2139                                      unsigned vm_subvector_count,
   2140                                      bool* skipped) {
   2141   VIXL_ASSERT(vd_form != kFormatUndefined);
   2142   VIXL_ASSERT(vn_form != kFormatUndefined);
   2143   VIXL_ASSERT(vm_form != kFormatUndefined);
   2144   VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
   2145 
   2146   CPUFeatures features;
   2147   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
   2148   features.Combine(CPUFeatures::kFP);
   2149   features.Combine(CPUFeatures::kRDM);
   2150   features.Combine(CPUFeatures::kDotProduct);
   2151   SETUP_WITH_FEATURES(features);
   2152 
   2153   START();
   2154 
   2155   // Roll up the loop to keep the code size down.
   2156   Label loop_n, loop_m;
   2157 
   2158   Register out = x0;
   2159   Register inputs_n_base = x1;
   2160   Register inputs_m_base = x2;
   2161   Register inputs_d_base = x3;
   2162   Register inputs_n_last_16bytes = x4;
   2163   Register inputs_m_last_16bytes = x5;
   2164   Register index_n = x6;
   2165   Register index_m = x7;
   2166 
   2167   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   2168   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   2169   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2170 
   2171   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   2172   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2173   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2174   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   2175   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   2176 
   2177   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
   2178   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
   2179   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
   2180   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
   2181   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
   2182 
   2183   VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
   2184 
   2185   // Always load and store 128 bits regardless of the format.
   2186   VRegister vd = v0.V16B();
   2187   VRegister vn = v1.V16B();
   2188   VRegister vm = v2.V16B();
   2189   VRegister vntmp = v3.V16B();
   2190   VRegister vmtmp = v4.V16B();
   2191   VRegister vres = v5.V16B();
   2192 
   2193   // These will have the correct format for calling the 'helper'.
   2194   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2195   VRegister vm_helper =
   2196       VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
   2197   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   2198 
   2199   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2200   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   2201   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
   2202 
   2203   __ Mov(out, results);
   2204 
   2205   __ Mov(inputs_d_base, inputs_d);
   2206 
   2207   __ Mov(inputs_n_base, inputs_n);
   2208   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
   2209   __ Mov(inputs_m_base, inputs_m);
   2210   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
   2211 
   2212   __ Ldr(vd, MemOperand(inputs_d_base));
   2213   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   2214   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
   2215 
   2216   __ Mov(index_n, 0);
   2217   __ Bind(&loop_n);
   2218 
   2219   __ Ldr(vntmp_single,
   2220          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   2221   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   2222 
   2223   __ Mov(index_m, 0);
   2224   __ Bind(&loop_m);
   2225 
   2226   __ Ldr(vmtmp_single,
   2227          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
   2228   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
   2229 
   2230   __ Mov(vres, vd);
   2231   {
   2232     for (unsigned i = 0; i < indices_length; i++) {
   2233       {
   2234         SingleEmissionCheckScope guard(&masm);
   2235         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
   2236       }
   2237       __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   2238     }
   2239   }
   2240 
   2241   __ Add(index_m, index_m, 1);
   2242   __ Cmp(index_m, inputs_m_length);
   2243   __ B(lo, &loop_m);
   2244 
   2245   __ Add(index_n, index_n, 1);
   2246   __ Cmp(index_n, inputs_n_length);
   2247   __ B(lo, &loop_n);
   2248 
   2249   END();
   2250   TRY_RUN(skipped);
   2251   TEARDOWN();
   2252 }
   2253 
   2254 
   2255 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2256 // arrays of rawbit representation of input values. This ensures that
   2257 // exact bit comparisons can be performed.
   2258 template <typename Td, typename Tn, typename Tm>
   2259 static void TestByElementNEON(const char* name,
   2260                               TestByElementNEONHelper_t helper,
   2261                               const Td inputs_d[],
   2262                               const Tn inputs_n[],
   2263                               unsigned inputs_n_length,
   2264                               const Tm inputs_m[],
   2265                               unsigned inputs_m_length,
   2266                               const int indices[],
   2267                               unsigned indices_length,
   2268                               const Td expected[],
   2269                               unsigned expected_length,
   2270                               VectorFormat vd_form,
   2271                               VectorFormat vn_form,
   2272                               VectorFormat vm_form,
   2273                               unsigned vm_subvector_count = 1) {
   2274   VIXL_ASSERT(inputs_n_length > 0);
   2275   VIXL_ASSERT(inputs_m_length > 0);
   2276   VIXL_ASSERT(indices_length > 0);
   2277 
   2278   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
   2279 
   2280   const unsigned results_length =
   2281       inputs_n_length * inputs_m_length * indices_length;
   2282   Td* results = new Td[results_length * vd_lane_count];
   2283   const unsigned lane_bit = sizeof(Td) * 8;
   2284   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
   2285 
   2286   bool skipped;
   2287 
   2288   TestByElementNEON_Helper(helper,
   2289                            reinterpret_cast<uintptr_t>(inputs_d),
   2290                            reinterpret_cast<uintptr_t>(inputs_n),
   2291                            inputs_n_length,
   2292                            reinterpret_cast<uintptr_t>(inputs_m),
   2293                            inputs_m_length,
   2294                            indices,
   2295                            indices_length,
   2296                            reinterpret_cast<uintptr_t>(results),
   2297                            vd_form,
   2298                            vn_form,
   2299                            vm_form,
   2300                            vm_subvector_count,
   2301                            &skipped);
   2302 
   2303   if (Test::generate_test_trace()) {
   2304     // Print the results.
   2305     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2306     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2307       printf(" ");
   2308       // Output a separate result for each element of the result vector.
   2309       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2310         unsigned index = lane + (iteration * vd_lane_count);
   2311         printf(" 0x%0*" PRIx64 ",",
   2312                lane_len_in_hex,
   2313                static_cast<uint64_t>(results[index]));
   2314       }
   2315       printf("\n");
   2316     }
   2317 
   2318     printf("};\n");
   2319     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2320            name,
   2321            results_length);
   2322   } else if (!skipped) {
   2323     // Check the results.
   2324     VIXL_CHECK(expected_length == results_length);
   2325     unsigned error_count = 0;
   2326     unsigned d = 0;
   2327     const char* padding = "                    ";
   2328     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2329     for (unsigned n = 0; n < inputs_n_length; n++) {
   2330       for (unsigned m = 0; m < inputs_m_length; m++) {
   2331         for (unsigned index = 0; index < indices_length; index++, d++) {
   2332           bool error_in_vector = false;
   2333 
   2334           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2335             unsigned output_index =
   2336                 (n * inputs_m_length * indices_length * vd_lane_count) +
   2337                 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
   2338                 lane;
   2339 
   2340             if (results[output_index] != expected[output_index]) {
   2341               error_in_vector = true;
   2342               break;
   2343             }
   2344           }
   2345 
   2346           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2347             printf("%s\n", name);
   2348             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
   2349                    lane_len_in_hex + 1,
   2350                    padding,
   2351                    lane_len_in_hex + 1,
   2352                    padding,
   2353                    lane_len_in_hex + 1,
   2354                    padding,
   2355                    lane_len_in_hex + 1,
   2356                    padding);
   2357 
   2358             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2359               unsigned output_index =
   2360                   (n * inputs_m_length * indices_length * vd_lane_count) +
   2361                   (m * indices_length * vd_lane_count) +
   2362                   (index * vd_lane_count) + lane;
   2363               unsigned input_index_n =
   2364                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
   2365                   inputs_n_length;
   2366               unsigned input_index_m =
   2367                   (inputs_m_length - vd_lane_count + m + 1 + lane) %
   2368                   inputs_m_length;
   2369 
   2370               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
   2371                      " "
   2372                      "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2373                      results[output_index] != expected[output_index] ? '*'
   2374                                                                      : ' ',
   2375                      lane_len_in_hex,
   2376                      static_cast<uint64_t>(inputs_d[lane]),
   2377                      lane_len_in_hex,
   2378                      static_cast<uint64_t>(inputs_n[input_index_n]),
   2379                      lane_len_in_hex,
   2380                      static_cast<uint64_t>(inputs_m[input_index_m]),
   2381                      indices[index],
   2382                      lane_len_in_hex,
   2383                      static_cast<uint64_t>(results[output_index]),
   2384                      lane_len_in_hex,
   2385                      static_cast<uint64_t>(expected[output_index]));
   2386             }
   2387           }
   2388         }
   2389       }
   2390     }
   2391     VIXL_ASSERT(d == expected_length);
   2392     if (error_count > kErrorReportLimit) {
   2393       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2394     }
   2395     VIXL_CHECK(error_count == 0);
   2396   }
   2397   delete[] results;
   2398 }
   2399 
   2400 
   2401 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
   2402 
   2403 
   2404 template <typename Tm>
   2405 void Test2OpImmNEON_Helper(
   2406     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
   2407     uintptr_t inputs_n,
   2408     unsigned inputs_n_length,
   2409     const Tm inputs_m[],
   2410     unsigned inputs_m_length,
   2411     uintptr_t results,
   2412     VectorFormat vd_form,
   2413     VectorFormat vn_form,
   2414     bool* skipped) {
   2415   VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
   2416 
   2417   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
   2418                       CPUFeatures::kFP,
   2419                       CPUFeatures::kNEONHalf);
   2420   START();
   2421 
   2422   // Roll up the loop to keep the code size down.
   2423   Label loop_n;
   2424 
   2425   Register out = x0;
   2426   Register inputs_n_base = x1;
   2427   Register inputs_n_last_16bytes = x3;
   2428   Register index_n = x5;
   2429 
   2430   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   2431   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   2432   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2433 
   2434   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   2435   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2436   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2437   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   2438   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   2439 
   2440 
   2441   // These will be either a D- or a Q-register form, with a single lane
   2442   // (for use in scalar load and store operations).
   2443   VRegister vd = VRegister(0, vd_bits);
   2444   VRegister vn = v1.V16B();
   2445   VRegister vntmp = v3.V16B();
   2446 
   2447   // These will have the correct format for use when calling 'helper'.
   2448   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
   2449   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2450 
   2451   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2452   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   2453 
   2454   __ Mov(out, results);
   2455 
   2456   __ Mov(inputs_n_base, inputs_n);
   2457   __ Mov(inputs_n_last_16bytes,
   2458          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
   2459 
   2460   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   2461 
   2462   __ Mov(index_n, 0);
   2463   __ Bind(&loop_n);
   2464 
   2465   __ Ldr(vntmp_single,
   2466          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   2467   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   2468 
   2469   // Set the destination to zero for tests such as '[r]shrn2'.
   2470   // TODO: Setting the destination to values other than zero might be a better
   2471   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
   2472   __ Movi(vd.V16B(), 0);
   2473 
   2474   {
   2475     for (unsigned i = 0; i < inputs_m_length; i++) {
   2476       {
   2477         SingleEmissionCheckScope guard(&masm);
   2478         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
   2479       }
   2480       __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   2481     }
   2482   }
   2483 
   2484   __ Add(index_n, index_n, 1);
   2485   __ Cmp(index_n, inputs_n_length);
   2486   __ B(lo, &loop_n);
   2487 
   2488   END();
   2489   TRY_RUN(skipped);
   2490   TEARDOWN();
   2491 }
   2492 
   2493 
   2494 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2495 // arrays of rawbit representation of input values. This ensures that
   2496 // exact bit comparisons can be performed.
   2497 template <typename Td, typename Tn, typename Tm>
   2498 static void Test2OpImmNEON(
   2499     const char* name,
   2500     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
   2501     const Tn inputs_n[],
   2502     unsigned inputs_n_length,
   2503     const Tm inputs_m[],
   2504     unsigned inputs_m_length,
   2505     const Td expected[],
   2506     unsigned expected_length,
   2507     VectorFormat vd_form,
   2508     VectorFormat vn_form) {
   2509   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
   2510 
   2511   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2512   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2513   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2514 
   2515   const unsigned results_length = inputs_n_length * inputs_m_length;
   2516   Td* results = new Td[results_length * vd_lane_count];
   2517   const unsigned lane_bit = sizeof(Td) * 8;
   2518   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   2519 
   2520   bool skipped;
   2521 
   2522   Test2OpImmNEON_Helper(helper,
   2523                         reinterpret_cast<uintptr_t>(inputs_n),
   2524                         inputs_n_length,
   2525                         inputs_m,
   2526                         inputs_m_length,
   2527                         reinterpret_cast<uintptr_t>(results),
   2528                         vd_form,
   2529                         vn_form,
   2530                         &skipped);
   2531 
   2532   if (Test::generate_test_trace()) {
   2533     // Print the results.
   2534     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2535     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2536       printf(" ");
   2537       // Output a separate result for each element of the result vector.
   2538       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2539         unsigned index = lane + (iteration * vd_lane_count);
   2540         printf(" 0x%0*" PRIx64 ",",
   2541                lane_len_in_hex,
   2542                static_cast<uint64_t>(results[index]));
   2543       }
   2544       printf("\n");
   2545     }
   2546 
   2547     printf("};\n");
   2548     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2549            name,
   2550            results_length);
   2551   } else if (!skipped) {
   2552     // Check the results.
   2553     VIXL_CHECK(expected_length == results_length);
   2554     unsigned error_count = 0;
   2555     unsigned d = 0;
   2556     const char* padding = "                    ";
   2557     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2558     for (unsigned n = 0; n < inputs_n_length; n++) {
   2559       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
   2560         bool error_in_vector = false;
   2561 
   2562         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2563           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2564                                   (m * vd_lane_count) + lane;
   2565 
   2566           if (results[output_index] != expected[output_index]) {
   2567             error_in_vector = true;
   2568             break;
   2569           }
   2570         }
   2571 
   2572         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2573           printf("%s\n", name);
   2574           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
   2575                  lane_len_in_hex + 1,
   2576                  padding,
   2577                  lane_len_in_hex,
   2578                  padding,
   2579                  lane_len_in_hex + 1,
   2580                  padding);
   2581 
   2582           const unsigned first_index_n =
   2583               inputs_n_length - (16 / vn_lane_bytes) + n + 1;
   2584 
   2585           for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
   2586                lane++) {
   2587             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2588                                     (m * vd_lane_count) + lane;
   2589             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
   2590             unsigned input_index_m = m;
   2591 
   2592             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
   2593                    " "
   2594                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2595                    results[output_index] != expected[output_index] ? '*' : ' ',
   2596                    lane_len_in_hex,
   2597                    static_cast<uint64_t>(inputs_n[input_index_n]),
   2598                    lane_len_in_hex,
   2599                    static_cast<uint64_t>(inputs_m[input_index_m]),
   2600                    lane_len_in_hex,
   2601                    static_cast<uint64_t>(results[output_index]),
   2602                    lane_len_in_hex,
   2603                    static_cast<uint64_t>(expected[output_index]));
   2604           }
   2605         }
   2606       }
   2607     }
   2608     VIXL_ASSERT(d == expected_length);
   2609     if (error_count > kErrorReportLimit) {
   2610       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2611     }
   2612     VIXL_CHECK(error_count == 0);
   2613   }
   2614   delete[] results;
   2615 }
   2616 
   2617 
   2618 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
   2619 
   2620 
   2621 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
   2622                                       uintptr_t inputs_d,
   2623                                       const int inputs_imm1[],
   2624                                       unsigned inputs_imm1_length,
   2625                                       uintptr_t inputs_n,
   2626                                       unsigned inputs_n_length,
   2627                                       const int inputs_imm2[],
   2628                                       unsigned inputs_imm2_length,
   2629                                       uintptr_t results,
   2630                                       VectorFormat vd_form,
   2631                                       VectorFormat vn_form,
   2632                                       bool* skipped) {
   2633   VIXL_ASSERT(vd_form != kFormatUndefined);
   2634   VIXL_ASSERT(vn_form != kFormatUndefined);
   2635 
   2636   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
   2637   START();
   2638 
   2639   // Roll up the loop to keep the code size down.
   2640   Label loop_n;
   2641 
   2642   Register out = x0;
   2643   Register inputs_d_base = x1;
   2644   Register inputs_n_base = x2;
   2645   Register inputs_n_last_vector = x4;
   2646   Register index_n = x6;
   2647 
   2648   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   2649   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   2650   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2651 
   2652   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   2653   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2654   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2655   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   2656   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   2657 
   2658 
   2659   // These will be either a D- or a Q-register form, with a single lane
   2660   // (for use in scalar load and store operations).
   2661   VRegister vd = VRegister(0, vd_bits);
   2662   VRegister vn = VRegister(1, vn_bits);
   2663   VRegister vntmp = VRegister(4, vn_bits);
   2664   VRegister vres = VRegister(5, vn_bits);
   2665 
   2666   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2667   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   2668 
   2669   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2670   VRegister vntmp_single = VRegister(4, vn_lane_bits);
   2671 
   2672   // Same registers for use in the 'ext' instructions.
   2673   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
   2674   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
   2675 
   2676   __ Mov(out, results);
   2677 
   2678   __ Mov(inputs_d_base, inputs_d);
   2679 
   2680   __ Mov(inputs_n_base, inputs_n);
   2681   __ Mov(inputs_n_last_vector,
   2682          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
   2683 
   2684   __ Ldr(vd, MemOperand(inputs_d_base));
   2685 
   2686   __ Ldr(vn, MemOperand(inputs_n_last_vector));
   2687 
   2688   __ Mov(index_n, 0);
   2689   __ Bind(&loop_n);
   2690 
   2691   __ Ldr(vntmp_single,
   2692          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   2693   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
   2694 
   2695   {
   2696     EmissionCheckScope guard(&masm,
   2697                              kInstructionSize * inputs_imm1_length *
   2698                                  inputs_imm2_length * 3);
   2699     for (unsigned i = 0; i < inputs_imm1_length; i++) {
   2700       for (unsigned j = 0; j < inputs_imm2_length; j++) {
   2701         __ Mov(vres, vd);
   2702         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
   2703         __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   2704       }
   2705     }
   2706   }
   2707 
   2708   __ Add(index_n, index_n, 1);
   2709   __ Cmp(index_n, inputs_n_length);
   2710   __ B(lo, &loop_n);
   2711 
   2712   END();
   2713   TRY_RUN(skipped);
   2714   TEARDOWN();
   2715 }
   2716 
   2717 
   2718 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2719 // arrays of rawbit representation of input values. This ensures that
   2720 // exact bit comparisons can be performed.
   2721 template <typename Td, typename Tn>
   2722 static void TestOpImmOpImmNEON(const char* name,
   2723                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
   2724                                const Td inputs_d[],
   2725                                const int inputs_imm1[],
   2726                                unsigned inputs_imm1_length,
   2727                                const Tn inputs_n[],
   2728                                unsigned inputs_n_length,
   2729                                const int inputs_imm2[],
   2730                                unsigned inputs_imm2_length,
   2731                                const Td expected[],
   2732                                unsigned expected_length,
   2733                                VectorFormat vd_form,
   2734                                VectorFormat vn_form) {
   2735   VIXL_ASSERT(inputs_n_length > 0);
   2736   VIXL_ASSERT(inputs_imm1_length > 0);
   2737   VIXL_ASSERT(inputs_imm2_length > 0);
   2738 
   2739   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2740 
   2741   const unsigned results_length =
   2742       inputs_n_length * inputs_imm1_length * inputs_imm2_length;
   2743 
   2744   Td* results = new Td[results_length * vd_lane_count];
   2745   const unsigned lane_bit = sizeof(Td) * 8;
   2746   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   2747 
   2748   bool skipped;
   2749 
   2750   TestOpImmOpImmNEON_Helper(helper,
   2751                             reinterpret_cast<uintptr_t>(inputs_d),
   2752                             inputs_imm1,
   2753                             inputs_imm1_length,
   2754                             reinterpret_cast<uintptr_t>(inputs_n),
   2755                             inputs_n_length,
   2756                             inputs_imm2,
   2757                             inputs_imm2_length,
   2758                             reinterpret_cast<uintptr_t>(results),
   2759                             vd_form,
   2760                             vn_form,
   2761                             &skipped);
   2762 
   2763   if (Test::generate_test_trace()) {
   2764     // Print the results.
   2765     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2766     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2767       printf(" ");
   2768       // Output a separate result for each element of the result vector.
   2769       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2770         unsigned index = lane + (iteration * vd_lane_count);
   2771         printf(" 0x%0*" PRIx64 ",",
   2772                lane_len_in_hex,
   2773                static_cast<uint64_t>(results[index]));
   2774       }
   2775       printf("\n");
   2776     }
   2777 
   2778     printf("};\n");
   2779     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2780            name,
   2781            results_length);
   2782   } else if (!skipped) {
   2783     // Check the results.
   2784     VIXL_CHECK(expected_length == results_length);
   2785     unsigned error_count = 0;
   2786     unsigned counted_length = 0;
   2787     const char* padding = "                    ";
   2788     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2789     for (unsigned n = 0; n < inputs_n_length; n++) {
   2790       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
   2791         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
   2792           bool error_in_vector = false;
   2793 
   2794           counted_length++;
   2795 
   2796           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2797             unsigned output_index =
   2798                 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
   2799                 (imm1 * inputs_imm2_length * vd_lane_count) +
   2800                 (imm2 * vd_lane_count) + lane;
   2801 
   2802             if (results[output_index] != expected[output_index]) {
   2803               error_in_vector = true;
   2804               break;
   2805             }
   2806           }
   2807 
   2808           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2809             printf("%s\n", name);
   2810             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
   2811                    lane_len_in_hex + 1,
   2812                    padding,
   2813                    lane_len_in_hex,
   2814                    padding,
   2815                    lane_len_in_hex + 1,
   2816                    padding,
   2817                    lane_len_in_hex,
   2818                    padding,
   2819                    lane_len_in_hex + 1,
   2820                    padding);
   2821 
   2822             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2823               unsigned output_index =
   2824                   (n * inputs_imm1_length * inputs_imm2_length *
   2825                    vd_lane_count) +
   2826                   (imm1 * inputs_imm2_length * vd_lane_count) +
   2827                   (imm2 * vd_lane_count) + lane;
   2828               unsigned input_index_n =
   2829                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
   2830                   inputs_n_length;
   2831               unsigned input_index_imm1 = imm1;
   2832               unsigned input_index_imm2 = imm2;
   2833 
   2834               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
   2835                      " "
   2836                      "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2837                      results[output_index] != expected[output_index] ? '*'
   2838                                                                      : ' ',
   2839                      lane_len_in_hex,
   2840                      static_cast<uint64_t>(inputs_d[lane]),
   2841                      lane_len_in_hex,
   2842                      static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
   2843                      lane_len_in_hex,
   2844                      static_cast<uint64_t>(inputs_n[input_index_n]),
   2845                      lane_len_in_hex,
   2846                      static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
   2847                      lane_len_in_hex,
   2848                      static_cast<uint64_t>(results[output_index]),
   2849                      lane_len_in_hex,
   2850                      static_cast<uint64_t>(expected[output_index]));
   2851             }
   2852           }
   2853         }
   2854       }
   2855     }
   2856     VIXL_ASSERT(counted_length == expected_length);
   2857     if (error_count > kErrorReportLimit) {
   2858       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2859     }
   2860     VIXL_CHECK(error_count == 0);
   2861   }
   2862   delete[] results;
   2863 }
   2864 
   2865 
   2866 // ==== Floating-point tests. ====
   2867 
   2868 
   2869 // Standard floating-point test expansion for both double- and single-precision
   2870 // operations.
   2871 #define STRINGIFY(s) #s
   2872 
   2873 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
   2874   Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
   2875              &MacroAssembler::mnemonic,                     \
   2876              input,                                         \
   2877              sizeof(input) / sizeof(input[0]),              \
   2878              kExpected_##mnemonic##_##variant,              \
   2879              kExpectedCount_##mnemonic##_##variant)
   2880 
   2881 #define DEFINE_TEST_FP(mnemonic, type, input)                    \
   2882   TEST(mnemonic##_d) {                                           \
   2883     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
   2884   }                                                              \
   2885   TEST(mnemonic##_s) {                                           \
   2886     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
   2887   }
   2888 
   2889 #define DEFINE_TEST_FP_FP16(mnemonic, type, input)                \
   2890   TEST(mnemonic##_d) {                                            \
   2891     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
   2892   }                                                               \
   2893   TEST(mnemonic##_s) {                                            \
   2894     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
   2895   }                                                               \
   2896   TEST(mnemonic##_h) {                                            \
   2897     CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
   2898   }
   2899 
   2900 
   2901 // TODO: Test with a newer version of valgrind.
   2902 //
   2903 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
   2904 // Therefore this test will be exiting though an ASSERT and thus leaking
   2905 // memory.
   2906 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
   2907 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
   2908 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
   2909 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
   2910 
   2911 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
   2912 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
   2913 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
   2914 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
   2915 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
   2916 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
   2917 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
   2918 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
   2919 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
   2920 
   2921 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
   2922 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
   2923 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
   2924 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
   2925 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
   2926 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
   2927 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
   2928 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
   2929 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
   2930 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
   2931 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
   2932 
   2933 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
   2934 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
   2935 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
   2936 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
   2937 
   2938 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
   2939 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
   2940 
   2941 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)               \
   2942   TEST(mnemonic##_xd) {                                            \
   2943     CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input);  \
   2944   }                                                                \
   2945   TEST(mnemonic##_xs) {                                            \
   2946     CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);   \
   2947   }                                                                \
   2948   TEST(mnemonic##_xh) {                                            \
   2949     CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
   2950   }                                                                \
   2951   TEST(mnemonic##_wd) {                                            \
   2952     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input);  \
   2953   }                                                                \
   2954   TEST(mnemonic##_ws) {                                            \
   2955     CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);   \
   2956   }                                                                \
   2957   TEST(mnemonic##_wh) {                                            \
   2958     CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
   2959   }
   2960 
   2961 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
   2962 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
   2963 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
   2964 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
   2965 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
   2966 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
   2967 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
   2968 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
   2969 
   2970 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input)           \
   2971   TEST(mnemonic##_wd) {                                           \
   2972     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
   2973   }
   2974 
   2975 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
   2976 
   2977 // TODO: Scvtf-fixed-point
   2978 // TODO: Scvtf-integer
   2979 // TODO: Ucvtf-fixed-point
   2980 // TODO: Ucvtf-integer
   2981 
   2982 // TODO: Fccmp
   2983 // TODO: Fcsel
   2984 
   2985 
   2986 // ==== NEON Tests. ====
   2987 
   2988 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
   2989   Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
   2990               &MacroAssembler::mnemonic,                             \
   2991               input_n,                                               \
   2992               (sizeof(input_n) / sizeof(input_n[0])),                \
   2993               kExpected_NEON_##mnemonic##_##vdform,                  \
   2994               kExpectedCount_NEON_##mnemonic##_##vdform,             \
   2995               kFormat##vdform,                                       \
   2996               kFormat##vnform)
   2997 
   2998 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
   2999   Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
   3000                         vnform),                                             \
   3001                     &MacroAssembler::mnemonic,                               \
   3002                     input_n,                                                 \
   3003                     (sizeof(input_n) / sizeof(input_n[0])),                  \
   3004                     kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
   3005                     kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
   3006                     kFormat##vdform,                                         \
   3007                     kFormat##vnform)
   3008 
   3009 #define CALL_TEST_NEON_HELPER_2Op(                               \
   3010     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
   3011   Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),         \
   3012               &MacroAssembler::mnemonic,                         \
   3013               input_d,                                           \
   3014               input_n,                                           \
   3015               (sizeof(input_n) / sizeof(input_n[0])),            \
   3016               input_m,                                           \
   3017               (sizeof(input_m) / sizeof(input_m[0])),            \
   3018               kExpected_NEON_##mnemonic##_##vdform,              \
   3019               kExpectedCount_NEON_##mnemonic##_##vdform,         \
   3020               kFormat##vdform,                                   \
   3021               kFormat##vnform,                                   \
   3022               kFormat##vmform)
   3023 
   3024 #define CALL_TEST_NEON_HELPER_2OpImm(                                 \
   3025     mnemonic, vdform, vnform, input_n, input_m)                       \
   3026   Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
   3027                  &MacroAssembler::mnemonic,                           \
   3028                  input_n,                                             \
   3029                  (sizeof(input_n) / sizeof(input_n[0])),              \
   3030                  input_m,                                             \
   3031                  (sizeof(input_m) / sizeof(input_m[0])),              \
   3032                  kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
   3033                  kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
   3034                  kFormat##vdform,                                     \
   3035                  kFormat##vnform)
   3036 
   3037 #define CALL_TEST_NEON_HELPER_ByElement(                                  \
   3038     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
   3039   TestByElementNEON(                                                      \
   3040       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(            \
   3041           vnform) "_" STRINGIFY(vmform),                                  \
   3042       &MacroAssembler::mnemonic,                                          \
   3043       input_d,                                                            \
   3044       input_n,                                                            \
   3045       (sizeof(input_n) / sizeof(input_n[0])),                             \
   3046       input_m,                                                            \
   3047       (sizeof(input_m) / sizeof(input_m[0])),                             \
   3048       indices,                                                            \
   3049       (sizeof(indices) / sizeof(indices[0])),                             \
   3050       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,         \
   3051       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,    \
   3052       kFormat##vdform,                                                    \
   3053       kFormat##vnform,                                                    \
   3054       kFormat##vmform)
   3055 
   3056 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,           \
   3057                                                     vdform,             \
   3058                                                     vnform,             \
   3059                                                     vmform,             \
   3060                                                     input_d,            \
   3061                                                     input_n,            \
   3062                                                     input_m,            \
   3063                                                     indices,            \
   3064                                                     vm_subvector_count) \
   3065   TestByElementNEON(                                                    \
   3066       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(          \
   3067           vnform) "_" STRINGIFY(vmform),                                \
   3068       &MacroAssembler::mnemonic,                                        \
   3069       input_d,                                                          \
   3070       input_n,                                                          \
   3071       (sizeof(input_n) / sizeof(input_n[0])),                           \
   3072       input_m,                                                          \
   3073       (sizeof(input_m) / sizeof(input_m[0])),                           \
   3074       indices,                                                          \
   3075       (sizeof(indices) / sizeof(indices[0])),                           \
   3076       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,       \
   3077       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,  \
   3078       kFormat##vdform,                                                  \
   3079       kFormat##vnform,                                                  \
   3080       kFormat##vmform,                                                  \
   3081       vm_subvector_count)
   3082 
   3083 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
   3084                                          mnemonic,                 \
   3085                                          vdform,                   \
   3086                                          vnform,                   \
   3087                                          input_d,                  \
   3088                                          input_imm1,               \
   3089                                          input_n,                  \
   3090                                          input_imm2)               \
   3091   TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
   3092                      helper,                                       \
   3093                      input_d,                                      \
   3094                      input_imm1,                                   \
   3095                      (sizeof(input_imm1) / sizeof(input_imm1[0])), \
   3096                      input_n,                                      \
   3097                      (sizeof(input_n) / sizeof(input_n[0])),       \
   3098                      input_imm2,                                   \
   3099                      (sizeof(input_imm2) / sizeof(input_imm2[0])), \
   3100                      kExpected_NEON_##mnemonic##_##vdform,         \
   3101                      kExpectedCount_NEON_##mnemonic##_##vdform,    \
   3102                      kFormat##vdform,                              \
   3103                      kFormat##vnform)
   3104 
   3105 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
   3106   CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
   3107 
   3108 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
   3109   TEST(mnemonic##_8B) {                                             \
   3110     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
   3111   }                                                                 \
   3112   TEST(mnemonic##_16B) {                                            \
   3113     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
   3114   }
   3115 
   3116 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
   3117   TEST(mnemonic##_4H) {                                             \
   3118     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
   3119   }                                                                 \
   3120   TEST(mnemonic##_8H) {                                             \
   3121     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
   3122   }
   3123 
   3124 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
   3125   TEST(mnemonic##_2S) {                                             \
   3126     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
   3127   }                                                                 \
   3128   TEST(mnemonic##_4S) {                                             \
   3129     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
   3130   }
   3131 
   3132 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
   3133   DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
   3134   DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
   3135 
   3136 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
   3137   DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
   3138   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
   3139 
   3140 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
   3141   DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
   3142   TEST(mnemonic##_2D) {                                             \
   3143     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
   3144   }
   3145 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
   3146   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
   3147   TEST(mnemonic##_2D) {                                             \
   3148     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
   3149   }
   3150 
   3151 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
   3152   TEST(mnemonic##_2S) {                                             \
   3153     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
   3154   }                                                                 \
   3155   TEST(mnemonic##_4S) {                                             \
   3156     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
   3157   }                                                                 \
   3158   TEST(mnemonic##_2D) {                                             \
   3159     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
   3160   }
   3161 
   3162 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input)              \
   3163   DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                         \
   3164   TEST(mnemonic##_4H) {                                              \
   3165     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
   3166   }                                                                  \
   3167   TEST(mnemonic##_8H) {                                              \
   3168     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
   3169   }
   3170 
   3171 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input)      \
   3172   TEST(mnemonic##_H) {                                              \
   3173     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
   3174   }                                                                 \
   3175   TEST(mnemonic##_S) {                                              \
   3176     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);   \
   3177   }                                                                 \
   3178   TEST(mnemonic##_D) {                                              \
   3179     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);  \
   3180   }
   3181 
   3182 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
   3183   TEST(mnemonic##_B) {                                            \
   3184     CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
   3185   }
   3186 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
   3187   TEST(mnemonic##_H) {                                             \
   3188     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
   3189   }
   3190 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
   3191   TEST(mnemonic##_S) {                                             \
   3192     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
   3193   }
   3194 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
   3195   TEST(mnemonic##_D) {                                             \
   3196     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
   3197   }
   3198 
   3199 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
   3200   DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
   3201   DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
   3202   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
   3203   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
   3204 
   3205 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
   3206   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
   3207   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
   3208 
   3209 
   3210 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
   3211   CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
   3212 
   3213 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
   3214   TEST(mnemonic##_B_8B) {                                               \
   3215     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
   3216   }                                                                     \
   3217   TEST(mnemonic##_B_16B) {                                              \
   3218     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
   3219   }                                                                     \
   3220   TEST(mnemonic##_H_4H) {                                               \
   3221     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
   3222   }                                                                     \
   3223   TEST(mnemonic##_H_8H) {                                               \
   3224     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
   3225   }                                                                     \
   3226   TEST(mnemonic##_S_4S) {                                               \
   3227     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
   3228   }
   3229 
   3230 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
   3231   TEST(mnemonic##_H_8B) {                                               \
   3232     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
   3233   }                                                                     \
   3234   TEST(mnemonic##_H_16B) {                                              \
   3235     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
   3236   }                                                                     \
   3237   TEST(mnemonic##_S_4H) {                                               \
   3238     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
   3239   }                                                                     \
   3240   TEST(mnemonic##_S_8H) {                                               \
   3241     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
   3242   }                                                                     \
   3243   TEST(mnemonic##_D_4S) {                                               \
   3244     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
   3245   }
   3246 
   3247 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                      \
   3248   TEST(mnemonic##_H_4H) {                                                \
   3249     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
   3250   }                                                                      \
   3251   TEST(mnemonic##_H_8H) {                                                \
   3252     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
   3253   }                                                                      \
   3254   TEST(mnemonic##_S_4S) {                                                \
   3255     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);   \
   3256   }
   3257 
   3258 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
   3259   CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
   3260 
   3261 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
   3262   TEST(mnemonic##_4H) {                                                 \
   3263     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
   3264   }                                                                     \
   3265   TEST(mnemonic##_8H) {                                                 \
   3266     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
   3267   }                                                                     \
   3268   TEST(mnemonic##_2S) {                                                 \
   3269     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
   3270   }                                                                     \
   3271   TEST(mnemonic##_4S) {                                                 \
   3272     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
   3273   }                                                                     \
   3274   TEST(mnemonic##_1D) {                                                 \
   3275     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
   3276   }                                                                     \
   3277   TEST(mnemonic##_2D) {                                                 \
   3278     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
   3279   }
   3280 
   3281 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
   3282   TEST(mnemonic##_8B) {                                                     \
   3283     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
   3284   }                                                                         \
   3285   TEST(mnemonic##_4H) {                                                     \
   3286     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
   3287   }                                                                         \
   3288   TEST(mnemonic##_2S) {                                                     \
   3289     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
   3290   }                                                                         \
   3291   TEST(mnemonic##2_16B) {                                                   \
   3292     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
   3293   }                                                                         \
   3294   TEST(mnemonic##2_8H) {                                                    \
   3295     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
   3296   }                                                                         \
   3297   TEST(mnemonic##2_4S) {                                                    \
   3298     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
   3299   }
   3300 
   3301 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
   3302   TEST(mnemonic##_4S) {                                                     \
   3303     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
   3304   }                                                                         \
   3305   TEST(mnemonic##_2D) {                                                     \
   3306     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
   3307   }                                                                         \
   3308   TEST(mnemonic##2_4S) {                                                    \
   3309     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
   3310   }                                                                         \
   3311   TEST(mnemonic##2_2D) {                                                    \
   3312     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
   3313   }
   3314 
   3315 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
   3316   TEST(mnemonic##_4H) {                                                    \
   3317     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
   3318   }                                                                        \
   3319   TEST(mnemonic##_2S) {                                                    \
   3320     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
   3321   }                                                                        \
   3322   TEST(mnemonic##2_8H) {                                                   \
   3323     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
   3324   }                                                                        \
   3325   TEST(mnemonic##2_4S) {                                                   \
   3326     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
   3327   }
   3328 
   3329 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
   3330   TEST(mnemonic##_2S) {                                                    \
   3331     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
   3332   }                                                                        \
   3333   TEST(mnemonic##2_4S) {                                                   \
   3334     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
   3335   }
   3336 
   3337 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
   3338   TEST(mnemonic##_B) {                                                \
   3339     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
   3340   }                                                                   \
   3341   TEST(mnemonic##_H) {                                                \
   3342     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
   3343   }                                                                   \
   3344   TEST(mnemonic##_S) {                                                \
   3345     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
   3346   }
   3347 
   3348 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)            \
   3349   TEST(mnemonic##_S) {                                                  \
   3350     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);   \
   3351   }                                                                     \
   3352   TEST(mnemonic##_D) {                                                  \
   3353     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);  \
   3354   }                                                                     \
   3355   TEST(mnemonic##_H) {                                                  \
   3356     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
   3357   }
   3358 
   3359 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
   3360   {                                                                       \
   3361     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
   3362                               variant,                                    \
   3363                               variant,                                    \
   3364                               variant,                                    \
   3365                               input_d,                                    \
   3366                               input_nm,                                   \
   3367                               input_nm);                                  \
   3368   }
   3369 
   3370 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
   3371   TEST(mnemonic##_8B) {                                    \
   3372     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
   3373                                 8B,                        \
   3374                                 kInput8bitsAccDestination, \
   3375                                 kInput8bits##input);       \
   3376   }                                                        \
   3377   TEST(mnemonic##_16B) {                                   \
   3378     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
   3379                                 16B,                       \
   3380                                 kInput8bitsAccDestination, \
   3381                                 kInput8bits##input);       \
   3382   }
   3383 
   3384 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
   3385   TEST(mnemonic##_4H) {                                     \
   3386     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3387                                 4H,                         \
   3388                                 kInput16bitsAccDestination, \
   3389                                 kInput16bits##input);       \
   3390   }                                                         \
   3391   TEST(mnemonic##_8H) {                                     \
   3392     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3393                                 8H,                         \
   3394                                 kInput16bitsAccDestination, \
   3395                                 kInput16bits##input);       \
   3396   }                                                         \
   3397   TEST(mnemonic##_2S) {                                     \
   3398     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3399                                 2S,                         \
   3400                                 kInput32bitsAccDestination, \
   3401                                 kInput32bits##input);       \
   3402   }                                                         \
   3403   TEST(mnemonic##_4S) {                                     \
   3404     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3405                                 4S,                         \
   3406                                 kInput32bitsAccDestination, \
   3407                                 kInput32bits##input);       \
   3408   }
   3409 
   3410 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
   3411   DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
   3412   DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
   3413 
   3414 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
   3415   DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
   3416   TEST(mnemonic##_2D) {                                     \
   3417     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3418                                 2D,                         \
   3419                                 kInput64bitsAccDestination, \
   3420                                 kInput64bits##input);       \
   3421   }
   3422 
   3423 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)           \
   3424   TEST(mnemonic##_4H) {                                      \
   3425     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3426                                 4H,                          \
   3427                                 kInputFloat16AccDestination, \
   3428                                 kInputFloat16##input);       \
   3429   }                                                          \
   3430   TEST(mnemonic##_8H) {                                      \
   3431     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3432                                 8H,                          \
   3433                                 kInputFloat16AccDestination, \
   3434                                 kInputFloat16##input);       \
   3435   }                                                          \
   3436   TEST(mnemonic##_2S) {                                      \
   3437     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3438                                 2S,                          \
   3439                                 kInputFloatAccDestination,   \
   3440                                 kInputFloat##input);         \
   3441   }                                                          \
   3442   TEST(mnemonic##_4S) {                                      \
   3443     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3444                                 4S,                          \
   3445                                 kInputFloatAccDestination,   \
   3446                                 kInputFloat##input);         \
   3447   }                                                          \
   3448   TEST(mnemonic##_2D) {                                      \
   3449     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3450                                 2D,                          \
   3451                                 kInputDoubleAccDestination,  \
   3452                                 kInputDouble##input);        \
   3453   }
   3454 
   3455 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
   3456   TEST(mnemonic##_D) {                                      \
   3457     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3458                                 D,                          \
   3459                                 kInput64bitsAccDestination, \
   3460                                 kInput64bits##input);       \
   3461   }
   3462 
   3463 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
   3464   TEST(mnemonic##_H) {                                      \
   3465     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3466                                 H,                          \
   3467                                 kInput16bitsAccDestination, \
   3468                                 kInput16bits##input);       \
   3469   }                                                         \
   3470   TEST(mnemonic##_S) {                                      \
   3471     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3472                                 S,                          \
   3473                                 kInput32bitsAccDestination, \
   3474                                 kInput32bits##input);       \
   3475   }
   3476 
   3477 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
   3478   TEST(mnemonic##_B) {                                      \
   3479     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3480                                 B,                          \
   3481                                 kInput8bitsAccDestination,  \
   3482                                 kInput8bits##input);        \
   3483   }                                                         \
   3484   TEST(mnemonic##_H) {                                      \
   3485     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3486                                 H,                          \
   3487                                 kInput16bitsAccDestination, \
   3488                                 kInput16bits##input);       \
   3489   }                                                         \
   3490   TEST(mnemonic##_S) {                                      \
   3491     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3492                                 S,                          \
   3493                                 kInput32bitsAccDestination, \
   3494                                 kInput32bits##input);       \
   3495   }                                                         \
   3496   TEST(mnemonic##_D) {                                      \
   3497     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3498                                 D,                          \
   3499                                 kInput64bitsAccDestination, \
   3500                                 kInput64bits##input);       \
   3501   }
   3502 
   3503 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)    \
   3504   TEST(mnemonic##_H) {                                       \
   3505     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3506                                 H,                           \
   3507                                 kInputFloat16AccDestination, \
   3508                                 kInputFloat16##input);       \
   3509   }                                                          \
   3510   TEST(mnemonic##_S) {                                       \
   3511     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3512                                 S,                           \
   3513                                 kInputFloatAccDestination,   \
   3514                                 kInputFloat##input);         \
   3515   }                                                          \
   3516   TEST(mnemonic##_D) {                                       \
   3517     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
   3518                                 D,                           \
   3519                                 kInputDoubleAccDestination,  \
   3520                                 kInputDouble##input);        \
   3521   }
   3522 
   3523 #define CALL_TEST_NEON_HELPER_3DIFF(                             \
   3524     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
   3525   {                                                              \
   3526     CALL_TEST_NEON_HELPER_2Op(mnemonic,                          \
   3527                               vdform,                            \
   3528                               vnform,                            \
   3529                               vmform,                            \
   3530                               input_d,                           \
   3531                               input_n,                           \
   3532                               input_m);                          \
   3533   }
   3534 
   3535 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
   3536   TEST(mnemonic##_8H) {                                     \
   3537     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3538                                 8H,                         \
   3539                                 8B,                         \
   3540                                 8B,                         \
   3541                                 kInput16bitsAccDestination, \
   3542                                 kInput8bits##input,         \
   3543                                 kInput8bits##input);        \
   3544   }                                                         \
   3545   TEST(mnemonic##2_8H) {                                    \
   3546     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3547                                 8H,                         \
   3548                                 16B,                        \
   3549                                 16B,                        \
   3550                                 kInput16bitsAccDestination, \
   3551                                 kInput8bits##input,         \
   3552                                 kInput8bits##input);        \
   3553   }
   3554 
   3555 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
   3556   TEST(mnemonic##_4S) {                                     \
   3557     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3558                                 4S,                         \
   3559                                 4H,                         \
   3560                                 4H,                         \
   3561                                 kInput32bitsAccDestination, \
   3562                                 kInput16bits##input,        \
   3563                                 kInput16bits##input);       \
   3564   }                                                         \
   3565   TEST(mnemonic##2_4S) {                                    \
   3566     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3567                                 4S,                         \
   3568                                 8H,                         \
   3569                                 8H,                         \
   3570                                 kInput32bitsAccDestination, \
   3571                                 kInput16bits##input,        \
   3572                                 kInput16bits##input);       \
   3573   }
   3574 
   3575 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
   3576   TEST(mnemonic##_2D) {                                     \
   3577     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3578                                 2D,                         \
   3579                                 2S,                         \
   3580                                 2S,                         \
   3581                                 kInput64bitsAccDestination, \
   3582                                 kInput32bits##input,        \
   3583                                 kInput32bits##input);       \
   3584   }                                                         \
   3585   TEST(mnemonic##2_2D) {                                    \
   3586     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3587                                 2D,                         \
   3588                                 4S,                         \
   3589                                 4S,                         \
   3590                                 kInput64bitsAccDestination, \
   3591                                 kInput32bits##input,        \
   3592                                 kInput32bits##input);       \
   3593   }
   3594 
   3595 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
   3596   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
   3597   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
   3598 
   3599 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
   3600   DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
   3601   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
   3602   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
   3603 
   3604 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
   3605   TEST(mnemonic##_S) {                                        \
   3606     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
   3607                                 S,                            \
   3608                                 H,                            \
   3609                                 H,                            \
   3610                                 kInput32bitsAccDestination,   \
   3611                                 kInput16bits##input,          \
   3612                                 kInput16bits##input);         \
   3613   }
   3614 
   3615 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
   3616   TEST(mnemonic##_D) {                                        \
   3617     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
   3618                                 D,                            \
   3619                                 S,                            \
   3620                                 S,                            \
   3621                                 kInput64bitsAccDestination,   \
   3622                                 kInput32bits##input,          \
   3623                                 kInput32bits##input);         \
   3624   }
   3625 
   3626 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
   3627   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
   3628   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
   3629 
   3630 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
   3631   TEST(mnemonic##_8H) {                                     \
   3632     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3633                                 8H,                         \
   3634                                 8H,                         \
   3635                                 8B,                         \
   3636                                 kInput16bitsAccDestination, \
   3637                                 kInput16bits##input,        \
   3638                                 kInput8bits##input);        \
   3639   }                                                         \
   3640   TEST(mnemonic##_4S) {                                     \
   3641     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3642                                 4S,                         \
   3643                                 4S,                         \
   3644                                 4H,                         \
   3645                                 kInput32bitsAccDestination, \
   3646                                 kInput32bits##input,        \
   3647                                 kInput16bits##input);       \
   3648   }                                                         \
   3649   TEST(mnemonic##_2D) {                                     \
   3650     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3651                                 2D,                         \
   3652                                 2D,                         \
   3653                                 2S,                         \
   3654                                 kInput64bitsAccDestination, \
   3655                                 kInput64bits##input,        \
   3656                                 kInput32bits##input);       \
   3657   }                                                         \
   3658   TEST(mnemonic##2_8H) {                                    \
   3659     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3660                                 8H,                         \
   3661                                 8H,                         \
   3662                                 16B,                        \
   3663                                 kInput16bitsAccDestination, \
   3664                                 kInput16bits##input,        \
   3665                                 kInput8bits##input);        \
   3666   }                                                         \
   3667   TEST(mnemonic##2_4S) {                                    \
   3668     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3669                                 4S,                         \
   3670                                 4S,                         \
   3671                                 8H,                         \
   3672                                 kInput32bitsAccDestination, \
   3673                                 kInput32bits##input,        \
   3674                                 kInput16bits##input);       \
   3675   }                                                         \
   3676   TEST(mnemonic##2_2D) {                                    \
   3677     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3678                                 2D,                         \
   3679                                 2D,                         \
   3680                                 4S,                         \
   3681                                 kInput64bitsAccDestination, \
   3682                                 kInput64bits##input,        \
   3683                                 kInput32bits##input);       \
   3684   }
   3685 
   3686 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
   3687   TEST(mnemonic##_8B) {                                     \
   3688     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3689                                 8B,                         \
   3690                                 8H,                         \
   3691                                 8H,                         \
   3692                                 kInput8bitsAccDestination,  \
   3693                                 kInput16bits##input,        \
   3694                                 kInput16bits##input);       \
   3695   }                                                         \
   3696   TEST(mnemonic##_4H) {                                     \
   3697     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3698                                 4H,                         \
   3699                                 4S,                         \
   3700                                 4S,                         \
   3701                                 kInput16bitsAccDestination, \
   3702                                 kInput32bits##input,        \
   3703                                 kInput32bits##input);       \
   3704   }                                                         \
   3705   TEST(mnemonic##_2S) {                                     \
   3706     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3707                                 2S,                         \
   3708                                 2D,                         \
   3709                                 2D,                         \
   3710                                 kInput32bitsAccDestination, \
   3711                                 kInput64bits##input,        \
   3712                                 kInput64bits##input);       \
   3713   }                                                         \
   3714   TEST(mnemonic##2_16B) {                                   \
   3715     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3716                                 16B,                        \
   3717                                 8H,                         \
   3718                                 8H,                         \
   3719                                 kInput8bitsAccDestination,  \
   3720                                 kInput16bits##input,        \
   3721                                 kInput16bits##input);       \
   3722   }                                                         \
   3723   TEST(mnemonic##2_8H) {                                    \
   3724     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3725                                 8H,                         \
   3726                                 4S,                         \
   3727                                 4S,                         \
   3728                                 kInput16bitsAccDestination, \
   3729                                 kInput32bits##input,        \
   3730                                 kInput32bits##input);       \
   3731   }                                                         \
   3732   TEST(mnemonic##2_4S) {                                    \
   3733     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3734                                 4S,                         \
   3735                                 2D,                         \
   3736                                 2D,                         \
   3737                                 kInput32bitsAccDestination, \
   3738                                 kInput64bits##input,        \
   3739                                 kInput64bits##input);       \
   3740   }
   3741 
   3742 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
   3743   TEST(mnemonic##_2S) {                                     \
   3744     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3745                                 2S,                         \
   3746                                 8B,                         \
   3747                                 8B,                         \
   3748                                 kInput32bitsAccDestination, \
   3749                                 kInput8bits##input,         \
   3750                                 kInput8bits##input);        \
   3751   }                                                         \
   3752   TEST(mnemonic##_4S) {                                     \
   3753     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3754                                 4S,                         \
   3755                                 16B,                        \
   3756                                 16B,                        \
   3757                                 kInput32bitsAccDestination, \
   3758                                 kInput8bits##input,         \
   3759                                 kInput8bits##input);        \
   3760   }
   3761 
   3762 
   3763 #define CALL_TEST_NEON_HELPER_2OPIMM(             \
   3764     mnemonic, vdform, vnform, input_n, input_imm) \
   3765   {                                               \
   3766     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,        \
   3767                                  vdform,          \
   3768                                  vnform,          \
   3769                                  input_n,         \
   3770                                  input_imm);      \
   3771   }
   3772 
   3773 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
   3774   TEST(mnemonic##_8B_2OPIMM) {                                \
   3775     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3776                                  8B,                          \
   3777                                  8B,                          \
   3778                                  kInput8bits##input,          \
   3779                                  kInput8bitsImm##input_imm);  \
   3780   }                                                           \
   3781   TEST(mnemonic##_16B_2OPIMM) {                               \
   3782     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3783                                  16B,                         \
   3784                                  16B,                         \
   3785                                  kInput8bits##input,          \
   3786                                  kInput8bitsImm##input_imm);  \
   3787   }                                                           \
   3788   TEST(mnemonic##_4H_2OPIMM) {                                \
   3789     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3790                                  4H,                          \
   3791                                  4H,                          \
   3792                                  kInput16bits##input,         \
   3793                                  kInput16bitsImm##input_imm); \
   3794   }                                                           \
   3795   TEST(mnemonic##_8H_2OPIMM) {                                \
   3796     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3797                                  8H,                          \
   3798                                  8H,                          \
   3799                                  kInput16bits##input,         \
   3800                                  kInput16bitsImm##input_imm); \
   3801   }                                                           \
   3802   TEST(mnemonic##_2S_2OPIMM) {                                \
   3803     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3804                                  2S,                          \
   3805                                  2S,                          \
   3806                                  kInput32bits##input,         \
   3807                                  kInput32bitsImm##input_imm); \
   3808   }                                                           \
   3809   TEST(mnemonic##_4S_2OPIMM) {                                \
   3810     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3811                                  4S,                          \
   3812                                  4S,                          \
   3813                                  kInput32bits##input,         \
   3814                                  kInput32bitsImm##input_imm); \
   3815   }                                                           \
   3816   TEST(mnemonic##_2D_2OPIMM) {                                \
   3817     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3818                                  2D,                          \
   3819                                  2D,                          \
   3820                                  kInput64bits##input,         \
   3821                                  kInput64bitsImm##input_imm); \
   3822   }
   3823 
   3824 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
   3825   TEST(mnemonic##_8B_2OPIMM) {                                   \
   3826     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3827                                  8B,                             \
   3828                                  B,                              \
   3829                                  kInput8bits##input,             \
   3830                                  kInput8bitsImm##input_imm);     \
   3831   }                                                              \
   3832   TEST(mnemonic##_16B_2OPIMM) {                                  \
   3833     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3834                                  16B,                            \
   3835                                  B,                              \
   3836                                  kInput8bits##input,             \
   3837                                  kInput8bitsImm##input_imm);     \
   3838   }                                                              \
   3839   TEST(mnemonic##_4H_2OPIMM) {                                   \
   3840     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3841                                  4H,                             \
   3842                                  H,                              \
   3843                                  kInput16bits##input,            \
   3844                                  kInput16bitsImm##input_imm);    \
   3845   }                                                              \
   3846   TEST(mnemonic##_8H_2OPIMM) {                                   \
   3847     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3848                                  8H,                             \
   3849                                  H,                              \
   3850                                  kInput16bits##input,            \
   3851                                  kInput16bitsImm##input_imm);    \
   3852   }                                                              \
   3853   TEST(mnemonic##_2S_2OPIMM) {                                   \
   3854     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3855                                  2S,                             \
   3856                                  S,                              \
   3857                                  kInput32bits##input,            \
   3858                                  kInput32bitsImm##input_imm);    \
   3859   }                                                              \
   3860   TEST(mnemonic##_4S_2OPIMM) {                                   \
   3861     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3862                                  4S,                             \
   3863                                  S,                              \
   3864                                  kInput32bits##input,            \
   3865                                  kInput32bitsImm##input_imm);    \
   3866   }                                                              \
   3867   TEST(mnemonic##_2D_2OPIMM) {                                   \
   3868     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3869                                  2D,                             \
   3870                                  D,                              \
   3871                                  kInput64bits##input,            \
   3872                                  kInput64bitsImm##input_imm);    \
   3873   }
   3874 
   3875 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
   3876   TEST(mnemonic##_8B_2OPIMM) {                                     \
   3877     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3878                                  8B,                               \
   3879                                  8H,                               \
   3880                                  kInput16bits##input,              \
   3881                                  kInput8bitsImm##input_imm);       \
   3882   }                                                                \
   3883   TEST(mnemonic##_4H_2OPIMM) {                                     \
   3884     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3885                                  4H,                               \
   3886                                  4S,                               \
   3887                                  kInput32bits##input,              \
   3888                                  kInput16bitsImm##input_imm);      \
   3889   }                                                                \
   3890   TEST(mnemonic##_2S_2OPIMM) {                                     \
   3891     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3892                                  2S,                               \
   3893                                  2D,                               \
   3894                                  kInput64bits##input,              \
   3895                                  kInput32bitsImm##input_imm);      \
   3896   }                                                                \
   3897   TEST(mnemonic##2_16B_2OPIMM) {                                   \
   3898     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
   3899                                  16B,                              \
   3900                                  8H,                               \
   3901                                  kInput16bits##input,              \
   3902                                  kInput8bitsImm##input_imm);       \
   3903   }                                                                \
   3904   TEST(mnemonic##2_8H_2OPIMM) {                                    \
   3905     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
   3906                                  8H,                               \
   3907                                  4S,                               \
   3908                                  kInput32bits##input,              \
   3909                                  kInput16bitsImm##input_imm);      \
   3910   }                                                                \
   3911   TEST(mnemonic##2_4S_2OPIMM) {                                    \
   3912     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
   3913                                  4S,                               \
   3914                                  2D,                               \
   3915                                  kInput64bits##input,              \
   3916                                  kInput32bitsImm##input_imm);      \
   3917   }
   3918 
   3919 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
   3920   TEST(mnemonic##_B_2OPIMM) {                                             \
   3921     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   3922                                  B,                                       \
   3923                                  H,                                       \
   3924                                  kInput16bits##input,                     \
   3925                                  kInput8bitsImm##input_imm);              \
   3926   }                                                                       \
   3927   TEST(mnemonic##_H_2OPIMM) {                                             \
   3928     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   3929                                  H,                                       \
   3930                                  S,                                       \
   3931                                  kInput32bits##input,                     \
   3932                                  kInput16bitsImm##input_imm);             \
   3933   }                                                                       \
   3934   TEST(mnemonic##_S_2OPIMM) {                                             \
   3935     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   3936                                  S,                                       \
   3937                                  D,                                       \
   3938                                  kInput64bits##input,                     \
   3939                                  kInput32bitsImm##input_imm);             \
   3940   }
   3941 
   3942 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
   3943   TEST(mnemonic##_4H_2OPIMM) {                                        \
   3944     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3945                                  4H,                                  \
   3946                                  4H,                                  \
   3947                                  kInputFloat16##input,                \
   3948                                  kInputDoubleImm##input_imm);         \
   3949   }                                                                   \
   3950   TEST(mnemonic##_8H_2OPIMM) {                                        \
   3951     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3952                                  8H,                                  \
   3953                                  8H,                                  \
   3954                                  kInputFloat16##input,                \
   3955                                  kInputDoubleImm##input_imm);         \
   3956   }                                                                   \
   3957   TEST(mnemonic##_2S_2OPIMM) {                                        \
   3958     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3959                                  2S,                                  \
   3960                                  2S,                                  \
   3961                                  kInputFloat##Basic,                  \
   3962                                  kInputDoubleImm##input_imm)          \
   3963   }                                                                   \
   3964   TEST(mnemonic##_4S_2OPIMM) {                                        \
   3965     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3966                                  4S,                                  \
   3967                                  4S,                                  \
   3968                                  kInputFloat##input,                  \
   3969                                  kInputDoubleImm##input_imm);         \
   3970   }                                                                   \
   3971   TEST(mnemonic##_2D_2OPIMM) {                                        \
   3972     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3973                                  2D,                                  \
   3974                                  2D,                                  \
   3975                                  kInputDouble##input,                 \
   3976                                  kInputDoubleImm##input_imm);         \
   3977   }
   3978 
   3979 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
   3980   TEST(mnemonic##_4H_2OPIMM) {                                 \
   3981     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3982                                  4H,                           \
   3983                                  4H,                           \
   3984                                  kInputFloat16##input,         \
   3985                                  kInput16bitsImm##input_imm)   \
   3986   }                                                            \
   3987   TEST(mnemonic##_8H_2OPIMM) {                                 \
   3988     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3989                                  8H,                           \
   3990                                  8H,                           \
   3991                                  kInputFloat16##input,         \
   3992                                  kInput16bitsImm##input_imm)   \
   3993   }                                                            \
   3994   TEST(mnemonic##_2S_2OPIMM) {                                 \
   3995     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3996                                  2S,                           \
   3997                                  2S,                           \
   3998                                  kInputFloat##Basic,           \
   3999                                  kInput32bitsImm##input_imm)   \
   4000   }                                                            \
   4001   TEST(mnemonic##_4S_2OPIMM) {                                 \
   4002     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   4003                                  4S,                           \
   4004                                  4S,                           \
   4005                                  kInputFloat##input,           \
   4006                                  kInput32bitsImm##input_imm)   \
   4007   }                                                            \
   4008   TEST(mnemonic##_2D_2OPIMM) {                                 \
   4009     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   4010                                  2D,                           \
   4011                                  2D,                           \
   4012                                  kInputDouble##input,          \
   4013                                  kInput64bitsImm##input_imm)   \
   4014   }
   4015 
   4016 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
   4017   TEST(mnemonic##_H_2OPIMM) {                                         \
   4018     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   4019                                  H,                                   \
   4020                                  H,                                   \
   4021                                  kInputFloat16##Basic,                \
   4022                                  kInput16bitsImm##input_imm)          \
   4023   }                                                                   \
   4024   TEST(mnemonic##_S_2OPIMM) {                                         \
   4025     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   4026                                  S,                                   \
   4027                                  S,                                   \
   4028                                  kInputFloat##Basic,                  \
   4029                                  kInput32bitsImm##input_imm)          \
   4030   }                                                                   \
   4031   TEST(mnemonic##_D_2OPIMM) {                                         \
   4032     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   4033                                  D,                                   \
   4034                                  D,                                   \
   4035                                  kInputDouble##input,                 \
   4036                                  kInput64bitsImm##input_imm)          \
   4037   }
   4038 
   4039 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
   4040   TEST(mnemonic##_4H_2OPIMM) {                                  \
   4041     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
   4042                                  4H,                            \
   4043                                  4H,                            \
   4044                                  kInput16bits##input,           \
   4045                                  kInput16bitsImm##input_imm);   \
   4046   }                                                             \
   4047   TEST(mnemonic##_8H_2OPIMM) {                                  \
   4048     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
   4049                                  8H,                            \
   4050                                  8H,                            \
   4051                                  kInput16bits##input,           \
   4052                                  kInput16bitsImm##input_imm);   \
   4053   }                                                             \
   4054   TEST(mnemonic##_2S_2OPIMM) {                                  \
   4055     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
   4056                                  2S,                            \
   4057                                  2S,                            \
   4058                                  kInput32bits##input,           \
   4059                                  kInput32bitsImm##input_imm);   \
   4060   }                                                             \
   4061   TEST(mnemonic##_4S_2OPIMM) {                                  \
   4062     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
   4063                                  4S,                            \
   4064                                  4S,                            \
   4065                                  kInput32bits##input,           \
   4066                                  kInput32bitsImm##input_imm);   \
   4067   }                                                             \
   4068   TEST(mnemonic##_2D_2OPIMM) {                                  \
   4069     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
   4070                                  2D,                            \
   4071                                  2D,                            \
   4072                                  kInput64bits##input,           \
   4073                                  kInput64bitsImm##input_imm);   \
   4074   }
   4075 
   4076 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
   4077   TEST(mnemonic##_D_2OPIMM) {                                        \
   4078     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
   4079                                  D,                                  \
   4080                                  D,                                  \
   4081                                  kInput64bits##input,                \
   4082                                  kInput64bitsImm##input_imm);        \
   4083   }
   4084 
   4085 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
   4086   TEST(mnemonic##_H_2OPIMM) {                                          \
   4087     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
   4088                                  H,                                    \
   4089                                  H,                                    \
   4090                                  kInput16bits##input,                  \
   4091                                  kInput16bitsImm##input_imm);          \
   4092   }                                                                    \
   4093   TEST(mnemonic##_S_2OPIMM) {                                          \
   4094     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
   4095                                  S,                                    \
   4096                                  S,                                    \
   4097                                  kInput32bits##input,                  \
   4098                                  kInput32bitsImm##input_imm);          \
   4099   }                                                                    \
   4100   DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
   4101 
   4102 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
   4103   TEST(mnemonic##_D_2OPIMM) {                                           \
   4104     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
   4105                                  D,                                     \
   4106                                  D,                                     \
   4107                                  kInputDouble##input,                   \
   4108                                  kInputDoubleImm##input_imm);           \
   4109   }
   4110 
   4111 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
   4112   TEST(mnemonic##_H_2OPIMM) {                                             \
   4113     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   4114                                  H,                                       \
   4115                                  H,                                       \
   4116                                  kInputFloat16##input,                    \
   4117                                  kInputDoubleImm##input_imm);             \
   4118   }                                                                       \
   4119   TEST(mnemonic##_S_2OPIMM) {                                             \
   4120     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   4121                                  S,                                       \
   4122                                  S,                                       \
   4123                                  kInputFloat##input,                      \
   4124                                  kInputDoubleImm##input_imm);             \
   4125   }                                                                       \
   4126   DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
   4127 
   4128 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
   4129   TEST(mnemonic##_B_2OPIMM) {                                      \
   4130     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   4131                                  B,                                \
   4132                                  B,                                \
   4133                                  kInput8bits##input,               \
   4134                                  kInput8bitsImm##input_imm);       \
   4135   }                                                                \
   4136   DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
   4137 
   4138 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
   4139   TEST(mnemonic##_8H_2OPIMM) {                                   \
   4140     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   4141                                  8H,                             \
   4142                                  8B,                             \
   4143                                  kInput8bits##input,             \
   4144                                  kInput8bitsImm##input_imm);     \
   4145   }                                                              \
   4146   TEST(mnemonic##_4S_2OPIMM) {                                   \
   4147     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   4148                                  4S,                             \
   4149                                  4H,                             \
   4150                                  kInput16bits##input,            \
   4151                                  kInput16bitsImm##input_imm);    \
   4152   }                                                              \
   4153   TEST(mnemonic##_2D_2OPIMM) {                                   \
   4154     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   4155                                  2D,                             \
   4156                                  2S,                             \
   4157                                  kInput32bits##input,            \
   4158                                  kInput32bitsImm##input_imm);    \
   4159   }                                                              \
   4160   TEST(mnemonic##2_8H_2OPIMM) {                                  \
   4161     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
   4162                                  8H,                             \
   4163                                  16B,                            \
   4164                                  kInput8bits##input,             \
   4165                                  kInput8bitsImm##input_imm);     \
   4166   }                                                              \
   4167   TEST(mnemonic##2_4S_2OPIMM) {                                  \
   4168     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
   4169                                  4S,                             \
   4170                                  8H,                             \
   4171                                  kInput16bits##input,            \
   4172                                  kInput16bitsImm##input_imm);    \
   4173   }                                                              \
   4174   TEST(mnemonic##2_2D_2OPIMM) {                                  \
   4175     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
   4176                                  2D,                             \
   4177                                  4S,                             \
   4178                                  kInput32bits##input,            \
   4179                                  kInput32bitsImm##input_imm);    \
   4180   }
   4181 
   4182 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,           \
   4183                                                     vdform,             \
   4184                                                     vnform,             \
   4185                                                     vmform,             \
   4186                                                     input_d,            \
   4187                                                     input_n,            \
   4188                                                     input_m,            \
   4189                                                     indices,            \
   4190                                                     vm_subvector_count) \
   4191   {                                                                     \
   4192     CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,               \
   4193                                                 vdform,                 \
   4194                                                 vnform,                 \
   4195                                                 vmform,                 \
   4196                                                 input_d,                \
   4197                                                 input_n,                \
   4198                                                 input_m,                \
   4199                                                 indices,                \
   4200                                                 vm_subvector_count);    \
   4201   }
   4202 
   4203 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(                        \
   4204     mnemonic, input_d, input_n, input_m)                               \
   4205   TEST(mnemonic##_2S_8B_B) {                                           \
   4206     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
   4207                                                 2S,                    \
   4208                                                 8B,                    \
   4209                                                 B,                     \
   4210                                                 kInput32bits##input_d, \
   4211                                                 kInput8bits##input_n,  \
   4212                                                 kInput8bits##input_m,  \
   4213                                                 kInputSIndices,        \
   4214                                                 4);                    \
   4215   }                                                                    \
   4216   TEST(mnemonic##_4S_16B_B) {                                          \
   4217     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
   4218                                                 4S,                    \
   4219                                                 16B,                   \
   4220                                                 B,                     \
   4221                                                 kInput32bits##input_d, \
   4222                                                 kInput8bits##input_n,  \
   4223                                                 kInput8bits##input_m,  \
   4224                                                 kInputSIndices,        \
   4225                                                 4);                    \
   4226   }
   4227 
   4228 #define CALL_TEST_NEON_HELPER_BYELEMENT(                                  \
   4229     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
   4230   {                                                                       \
   4231     CALL_TEST_NEON_HELPER_ByElement(mnemonic,                             \
   4232                                     vdform,                               \
   4233                                     vnform,                               \
   4234                                     vmform,                               \
   4235                                     input_d,                              \
   4236                                     input_n,                              \
   4237                                     input_m,                              \
   4238                                     indices);                             \
   4239   }
   4240 
   4241 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
   4242   TEST(mnemonic##_4H_4H_H) {                                            \
   4243     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   4244                                     4H,                                 \
   4245                                     4H,                                 \
   4246                                     H,                                  \
   4247                                     kInput16bits##input_d,              \
   4248                                     kInput16bits##input_n,              \
   4249                                     kInput16bits##input_m,              \
   4250                                     kInputHIndices);                    \
   4251   }                                                                     \
   4252   TEST(mnemonic##_8H_8H_H) {                                            \
   4253     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   4254                                     8H,                                 \
   4255                                     8H,                                 \
   4256                                     H,                                  \
   4257                                     kInput16bits##input_d,              \
   4258                                     kInput16bits##input_n,              \
   4259                                     kInput16bits##input_m,              \
   4260                                     kInputHIndices);                    \
   4261   }                                                                     \
   4262   TEST(mnemonic##_2S_2S_S) {                                            \
   4263     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   4264                                     2S,                                 \
   4265                                     2S,                                 \
   4266                                     S,                                  \
   4267                                     kInput32bits##input_d,              \
   4268                                     kInput32bits##input_n,              \
   4269                                     kInput32bits##input_m,              \
   4270                                     kInputSIndices);                    \
   4271   }                                                                     \
   4272   TEST(mnemonic##_4S_4S_S) {                                            \
   4273     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   4274                                     4S,                                 \
   4275                                     4S,                                 \
   4276                                     S,                                  \
   4277                                     kInput32bits##input_d,              \
   4278                                     kInput32bits##input_n,              \
   4279                                     kInput32bits##input_m,              \
   4280                                     kInputSIndices);                    \
   4281   }
   4282 
   4283 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
   4284   TEST(mnemonic##_H_H_H) {                                                     \
   4285     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
   4286                                     H,                                         \
   4287                                     H,                                         \
   4288                                     H,                                         \
   4289                                     kInput16bits##input_d,                     \
   4290                                     kInput16bits##input_n,                     \
   4291                                     kInput16bits##input_m,                     \
   4292                                     kInputHIndices);                           \
   4293   }                                                                            \
   4294   TEST(mnemonic##_S_S_S) {                                                     \
   4295     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
   4296                                     S,                                         \
   4297                                     S,                                         \
   4298                                     S,                                         \
   4299                                     kInput32bits##input_d,                     \
   4300                                     kInput32bits##input_n,                     \
   4301                                     kInput32bits##input_m,                     \
   4302                                     kInputSIndices);                           \
   4303   }
   4304 
   4305 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
   4306   TEST(mnemonic##_4H_4H_H) {                                               \
   4307     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   4308                                     4H,                                    \
   4309                                     4H,                                    \
   4310                                     H,                                     \
   4311                                     kInputFloat16##input_d,                \
   4312                                     kInputFloat16##input_n,                \
   4313                                     kInputFloat16##input_m,                \
   4314                                     kInputHIndices);                       \
   4315   }                                                                        \
   4316   TEST(mnemonic##_8H_8H_H) {                                               \
   4317     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   4318                                     8H,                                    \
   4319                                     8H,                                    \
   4320                                     H,                                     \
   4321                                     kInputFloat16##input_d,                \
   4322                                     kInputFloat16##input_n,                \
   4323                                     kInputFloat16##input_m,                \
   4324                                     kInputHIndices);                       \
   4325   }                                                                        \
   4326   TEST(mnemonic##_2S_2S_S) {                                               \
   4327     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   4328                                     2S,                                    \
   4329                                     2S,                                    \
   4330                                     S,                                     \
   4331                                     kInputFloat##input_d,                  \
   4332                                     kInputFloat##input_n,                  \
   4333                                     kInputFloat##input_m,                  \
   4334                                     kInputSIndices);                       \
   4335   }                                                                        \
   4336   TEST(mnemonic##_4S_4S_S) {                                               \
   4337     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   4338                                     4S,                                    \
   4339                                     4S,                                    \
   4340                                     S,                                     \
   4341                                     kInputFloat##input_d,                  \
   4342                                     kInputFloat##input_n,                  \
   4343                                     kInputFloat##input_m,                  \
   4344                                     kInputSIndices);                       \
   4345   }                                                                        \
   4346   TEST(mnemonic##_2D_2D_D) {                                               \
   4347     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   4348                                     2D,                                    \
   4349                                     2D,                                    \
   4350                                     D,                                     \
   4351                                     kInputDouble##input_d,                 \
   4352                                     kInputDouble##input_n,                 \
   4353                                     kInputDouble##input_m,                 \
   4354                                     kInputDIndices);                       \
   4355   }
   4356 
   4357 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
   4358   TEST(mnemonic##_H_H_H) {                                                  \
   4359     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
   4360                                     H,                                      \
   4361                                     H,                                      \
   4362                                     H,                                      \
   4363                                     kInputFloat16##inp_d,                   \
   4364                                     kInputFloat16##inp_n,                   \
   4365                                     kInputFloat16##inp_m,                   \
   4366                                     kInputHIndices);                        \
   4367   }                                                                         \
   4368   TEST(mnemonic##_S_S_S) {                                                  \
   4369     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
   4370                                     S,                                      \
   4371                                     S,                                      \
   4372                                     S,                                      \
   4373                                     kInputFloat##inp_d,                     \
   4374                                     kInputFloat##inp_n,                     \
   4375                                     kInputFloat##inp_m,                     \
   4376                                     kInputSIndices);                        \
   4377   }                                                                         \
   4378   TEST(mnemonic##_D_D_D) {                                                  \
   4379     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
   4380                                     D,                                      \
   4381                                     D,                                      \
   4382                                     D,                                      \
   4383                                     kInputDouble##inp_d,                    \
   4384                                     kInputDouble##inp_n,                    \
   4385                                     kInputDouble##inp_m,                    \
   4386                                     kInputDIndices);                        \
   4387   }
   4388 
   4389 
   4390 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
   4391   TEST(mnemonic##_4S_4H_H) {                                                 \
   4392     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
   4393                                     4S,                                      \
   4394                                     4H,                                      \
   4395                                     H,                                       \
   4396                                     kInput32bits##input_d,                   \
   4397                                     kInput16bits##input_n,                   \
   4398                                     kInput16bits##input_m,                   \
   4399                                     kInputHIndices);                         \
   4400   }                                                                          \
   4401   TEST(mnemonic##2_4S_8H_H) {                                                \
   4402     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
   4403                                     4S,                                      \
   4404                                     8H,                                      \
   4405                                     H,                                       \
   4406                                     kInput32bits##input_d,                   \
   4407                                     kInput16bits##input_n,                   \
   4408                                     kInput16bits##input_m,                   \
   4409                                     kInputHIndices);                         \
   4410   }                                                                          \
   4411   TEST(mnemonic##_2D_2S_S) {                                                 \
   4412     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
   4413                                     2D,                                      \
   4414                                     2S,                                      \
   4415                                     S,                                       \
   4416                                     kInput64bits##input_d,                   \
   4417                                     kInput32bits##input_n,                   \
   4418                                     kInput32bits##input_m,                   \
   4419                                     kInputSIndices);                         \
   4420   }                                                                          \
   4421   TEST(mnemonic##2_2D_4S_S) {                                                \
   4422     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
   4423                                     2D,                                      \
   4424                                     4S,                                      \
   4425                                     S,                                       \
   4426                                     kInput64bits##input_d,                   \
   4427                                     kInput32bits##input_n,                   \
   4428                                     kInput32bits##input_m,                   \
   4429                                     kInputSIndices);                         \
   4430   }
   4431 
   4432 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(            \
   4433     mnemonic, input_d, input_n, input_m)                   \
   4434   TEST(mnemonic##_S_H_H) {                                 \
   4435     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
   4436                                     S,                     \
   4437                                     H,                     \
   4438                                     H,                     \
   4439                                     kInput32bits##input_d, \
   4440                                     kInput16bits##input_n, \
   4441                                     kInput16bits##input_m, \
   4442                                     kInputHIndices);       \
   4443   }                                                        \
   4444   TEST(mnemonic##_D_S_S) {                                 \
   4445     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
   4446                                     D,                     \
   4447                                     S,                     \
   4448                                     S,                     \
   4449                                     kInput64bits##input_d, \
   4450                                     kInput32bits##input_n, \
   4451                                     kInput32bits##input_m, \
   4452                                     kInputSIndices);       \
   4453   }
   4454 
   4455 
   4456 #define CALL_TEST_NEON_HELPER_2OP2IMM(                           \
   4457     mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
   4458   {                                                              \
   4459     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,  \
   4460                                      mnemonic,                   \
   4461                                      variant,                    \
   4462                                      variant,                    \
   4463                                      input_d,                    \
   4464                                      input_imm1,                 \
   4465                                      input_n,                    \
   4466                                      input_imm2);                \
   4467   }
   4468 
   4469 #define DEFINE_TEST_NEON_2OP2IMM(                               \
   4470     mnemonic, input_d, input_imm1, input_n, input_imm2)         \
   4471   TEST(mnemonic##_B) {                                          \
   4472     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4473                                   16B,                          \
   4474                                   kInput8bits##input_d,         \
   4475                                   kInput8bitsImm##input_imm1,   \
   4476                                   kInput8bits##input_n,         \
   4477                                   kInput8bitsImm##input_imm2);  \
   4478   }                                                             \
   4479   TEST(mnemonic##_H) {                                          \
   4480     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4481                                   8H,                           \
   4482                                   kInput16bits##input_d,        \
   4483                                   kInput16bitsImm##input_imm1,  \
   4484                                   kInput16bits##input_n,        \
   4485                                   kInput16bitsImm##input_imm2); \
   4486   }                                                             \
   4487   TEST(mnemonic##_S) {                                          \
   4488     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4489                                   4S,                           \
   4490                                   kInput32bits##input_d,        \
   4491                                   kInput32bitsImm##input_imm1,  \
   4492                                   kInput32bits##input_n,        \
   4493                                   kInput32bitsImm##input_imm2); \
   4494   }                                                             \
   4495   TEST(mnemonic##_D) {                                          \
   4496     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4497                                   2D,                           \
   4498                                   kInput64bits##input_d,        \
   4499                                   kInput64bitsImm##input_imm1,  \
   4500                                   kInput64bits##input_n,        \
   4501                                   kInput64bitsImm##input_imm2); \
   4502   }
   4503 
   4504 
   4505 // Advanced SIMD copy.
   4506 DEFINE_TEST_NEON_2OP2IMM(
   4507     ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
   4508 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
   4509 
   4510 
   4511 // Advanced SIMD scalar copy.
   4512 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
   4513 
   4514 
   4515 // Advanced SIMD three same.
   4516 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
   4517 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
   4518 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
   4519 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
   4520 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
   4521 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
   4522 DEFINE_TEST_NEON_3SAME(cmge, Basic)
   4523 DEFINE_TEST_NEON_3SAME(sshl, Basic)
   4524 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
   4525 DEFINE_TEST_NEON_3SAME(srshl, Basic)
   4526 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
   4527 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
   4528 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
   4529 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
   4530 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
   4531 DEFINE_TEST_NEON_3SAME(add, Basic)
   4532 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
   4533 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
   4534 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
   4535 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
   4536 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
   4537 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
   4538 DEFINE_TEST_NEON_3SAME(addp, Basic)
   4539 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
   4540 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
   4541 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
   4542 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
   4543 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
   4544 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
   4545 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
   4546 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
   4547 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
   4548 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
   4549 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
   4550 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
   4551 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
   4552 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
   4553 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
   4554 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
   4555 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
   4556 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
   4557 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
   4558 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
   4559 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
   4560 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
   4561 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
   4562 DEFINE_TEST_NEON_3SAME(ushl, Basic)
   4563 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
   4564 DEFINE_TEST_NEON_3SAME(urshl, Basic)
   4565 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
   4566 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
   4567 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
   4568 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
   4569 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
   4570 DEFINE_TEST_NEON_3SAME(sub, Basic)
   4571 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
   4572 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
   4573 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
   4574 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
   4575 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
   4576 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
   4577 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
   4578 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
   4579 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
   4580 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
   4581 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
   4582 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
   4583 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
   4584 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
   4585 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
   4586 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
   4587 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
   4588 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
   4589 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
   4590 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
   4591 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
   4592 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
   4593 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
   4594 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
   4595 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
   4596 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
   4597 
   4598 
   4599 // Advanced SIMD scalar three same.
   4600 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
   4601 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
   4602 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
   4603 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
   4604 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
   4605 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
   4606 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
   4607 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
   4608 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
   4609 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
   4610 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
   4611 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
   4612 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
   4613 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
   4614 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
   4615 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
   4616 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
   4617 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
   4618 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
   4619 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
   4620 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
   4621 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
   4622 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
   4623 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
   4624 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
   4625 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
   4626 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
   4627 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
   4628 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
   4629 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
   4630 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
   4631 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
   4632 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
   4633 
   4634 
   4635 // Advanced SIMD three different.
   4636 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
   4637 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
   4638 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
   4639 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
   4640 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
   4641 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
   4642 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
   4643 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
   4644 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
   4645 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
   4646 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
   4647 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
   4648 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
   4649 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
   4650 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
   4651 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
   4652 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
   4653 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
   4654 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
   4655 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
   4656 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
   4657 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
   4658 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
   4659 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
   4660 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
   4661 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
   4662 
   4663 
   4664 // Advanced SIMD scalar three different.
   4665 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
   4666 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
   4667 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
   4668 
   4669 
   4670 // Advanced SIMD scalar pairwise.
   4671 TEST(addp_SCALAR) {
   4672   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
   4673 }
   4674 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
   4675 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
   4676 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
   4677 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
   4678 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
   4679 
   4680 
   4681 // Advanced SIMD shift by immediate.
   4682 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
   4683 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
   4684 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
   4685 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
   4686 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
   4687 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
   4688 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
   4689 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
   4690 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
   4691 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
   4692 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
   4693 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
   4694                             FixedPointConversions,
   4695                             TypeWidthFromZeroToWidth)
   4696 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
   4697 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
   4698 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
   4699 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
   4700 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
   4701 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
   4702 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
   4703 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
   4704 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
   4705 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
   4706 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
   4707 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
   4708 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
   4709 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
   4710 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
   4711                             FixedPointConversions,
   4712                             TypeWidthFromZeroToWidth)
   4713 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
   4714 
   4715 
   4716 // Advanced SIMD scalar shift by immediate..
   4717 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
   4718 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
   4719 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
   4720 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
   4721 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
   4722 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
   4723 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
   4724 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
   4725 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
   4726                                    FixedPointConversions,
   4727                                    TypeWidthFromZeroToWidth)
   4728 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
   4729 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
   4730 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
   4731 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
   4732 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
   4733 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
   4734 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
   4735 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
   4736 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
   4737 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
   4738 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
   4739 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
   4740 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
   4741 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
   4742                                    FixedPointConversions,
   4743                                    TypeWidthFromZeroToWidth)
   4744 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
   4745 
   4746 
   4747 // Advanced SIMD two-register miscellaneous.
   4748 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
   4749 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
   4750 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
   4751 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
   4752 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
   4753 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
   4754 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
   4755 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
   4756 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
   4757 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
   4758 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
   4759 DEFINE_TEST_NEON_2SAME(abs, Basic)
   4760 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
   4761 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
   4762 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
   4763 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
   4764 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
   4765 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
   4766 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
   4767 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
   4768 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
   4769 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
   4770 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
   4771 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
   4772 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
   4773 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
   4774 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
   4775 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
   4776 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
   4777 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
   4778 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
   4779 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
   4780 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
   4781 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
   4782 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
   4783 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
   4784 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
   4785 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
   4786 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
   4787 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
   4788 DEFINE_TEST_NEON_2SAME(neg, Basic)
   4789 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
   4790 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
   4791 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
   4792 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
   4793 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
   4794 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
   4795 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
   4796 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
   4797 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
   4798 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
   4799 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
   4800 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
   4801 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
   4802 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
   4803 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
   4804 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
   4805 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
   4806 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
   4807 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
   4808 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
   4809 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
   4810 
   4811 
   4812 // Advanced SIMD scalar two-register miscellaneous.
   4813 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
   4814 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
   4815 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
   4816 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
   4817 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
   4818 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
   4819 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
   4820 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
   4821 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
   4822 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
   4823 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
   4824 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
   4825 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
   4826 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
   4827 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
   4828 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
   4829 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
   4830 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
   4831 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
   4832 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
   4833 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
   4834 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
   4835 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
   4836 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
   4837 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
   4838 TEST(fcvtxn_SCALAR) {
   4839   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
   4840 }
   4841 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
   4842 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
   4843 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
   4844 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
   4845 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
   4846 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
   4847 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
   4848 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
   4849 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
   4850 
   4851 
   4852 // Advanced SIMD across lanes.
   4853 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
   4854 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
   4855 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
   4856 DEFINE_TEST_NEON_ACROSS(addv, Basic)
   4857 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
   4858 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
   4859 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
   4860 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
   4861 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
   4862 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
   4863 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
   4864 
   4865 
   4866 // Advanced SIMD permute.
   4867 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
   4868 DEFINE_TEST_NEON_3SAME(trn1, Basic)
   4869 DEFINE_TEST_NEON_3SAME(zip1, Basic)
   4870 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
   4871 DEFINE_TEST_NEON_3SAME(trn2, Basic)
   4872 DEFINE_TEST_NEON_3SAME(zip2, Basic)
   4873 
   4874 
   4875 // Advanced SIMD vector x indexed element.
   4876 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
   4877 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
   4878 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
   4879 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
   4880 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
   4881 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
   4882 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
   4883 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
   4884 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
   4885 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
   4886 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
   4887 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
   4888 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
   4889 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
   4890 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
   4891 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
   4892 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
   4893 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
   4894 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
   4895 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
   4896 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
   4897 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
   4898 
   4899 
   4900 // Advanced SIMD scalar x indexed element.
   4901 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
   4902 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
   4903 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
   4904 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
   4905 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
   4906 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
   4907 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
   4908 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
   4909 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
   4910 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
   4911 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
   4912 
   4913 
   4914 #undef __
   4915 #define __ masm->
   4916 
   4917 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) &&                 \
   4918     defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
   4919     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
   4920 
   4921 // Generate a function that stores zero to a hard-coded address.
   4922 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
   4923   masm->Reset();
   4924 
   4925   UseScratchRegisterScope temps(masm);
   4926   Register temp = temps.AcquireX();
   4927   __ Mov(temp, reinterpret_cast<intptr_t>(target));
   4928   __ Str(wzr, MemOperand(temp));
   4929   __ Ret();
   4930 
   4931   masm->FinalizeCode();
   4932   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   4933 }
   4934 
   4935 
   4936 // Generate a function that stores the `int32_t` argument to a hard-coded
   4937 // address.
   4938 // In this example and the other below, we use the `abi` object to retrieve
   4939 // argument and return locations even though we could easily hard code them.
   4940 // This mirrors how more generic code (e.g. templated) user would use these
   4941 // mechanisms.
   4942 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
   4943   masm->Reset();
   4944 
   4945   ABI abi;
   4946   Register input =
   4947       Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
   4948 
   4949   UseScratchRegisterScope temps(masm);
   4950   Register temp = temps.AcquireX();
   4951   __ Mov(temp, reinterpret_cast<intptr_t>(target));
   4952   __ Str(input, MemOperand(temp));
   4953   __ Ret();
   4954 
   4955   masm->FinalizeCode();
   4956   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   4957 }
   4958 
   4959 
   4960 // A minimal implementation of a `pow` function.
   4961 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
   4962   masm->Reset();
   4963 
   4964   ABI abi;
   4965   Register input =
   4966       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
   4967   Register result =
   4968       Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
   4969   UseScratchRegisterScope temps(masm);
   4970   Register temp = temps.AcquireX();
   4971 
   4972   __ Mov(temp, 1);
   4973   for (unsigned i = 0; i < pow; i++) {
   4974     __ Mul(temp, temp, input);
   4975   }
   4976   __ Mov(result, temp);
   4977   __ Ret();
   4978 
   4979   masm->FinalizeCode();
   4980   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   4981 }
   4982 
   4983 
   4984 Instruction* GenerateSum(MacroAssembler* masm) {
   4985   masm->Reset();
   4986 
   4987   ABI abi;
   4988   FPRegister input_1 =
   4989       FPRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
   4990   Register input_2 =
   4991       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
   4992   FPRegister input_3 =
   4993       FPRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
   4994   FPRegister result =
   4995       FPRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
   4996 
   4997   UseScratchRegisterScope temps(masm);
   4998   FPRegister temp = temps.AcquireD();
   4999 
   5000   __ Fcvt(input_1.D(), input_1);
   5001   __ Scvtf(temp, input_2);
   5002   __ Fadd(temp, temp, input_1.D());
   5003   __ Fadd(result, temp, input_3);
   5004   __ Ret();
   5005 
   5006   masm->FinalizeCode();
   5007   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   5008 }
   5009 
   5010 
   5011 TEST(RunFrom) {
   5012   SETUP_WITH_FEATURES(CPUFeatures::kFP);
   5013 
   5014   // Run a function returning `void` and taking no argument.
   5015   int32_t value = 0xbad;
   5016   simulator.RunFrom(GenerateStoreZero(&masm, &value));
   5017   VIXL_CHECK(value == 0);
   5018 
   5019   // Run a function returning `void` and taking one argument.
   5020   int32_t argument = 0xf00d;
   5021   simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
   5022   VIXL_CHECK(value == 0xf00d);
   5023 
   5024   // Run a function taking one argument and returning a value.
   5025   int64_t res_int64_t;
   5026   res_int64_t =
   5027       simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
   5028   VIXL_CHECK(res_int64_t == 1);
   5029   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
   5030   VIXL_CHECK(res_int64_t == 123);
   5031   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
   5032   VIXL_CHECK(res_int64_t == 1024);
   5033 
   5034   // Run a function taking multiple arguments in registers.
   5035   double res_double =
   5036       simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
   5037                                                         1.0,
   5038                                                         2,
   5039                                                         3.0);
   5040   VIXL_CHECK(res_double == 6.0);
   5041 }
   5042 #endif
   5043 
   5044 
   5045 }  // namespace aarch64
   5046 }  // namespace vixl
   5047