Home | History | Annotate | Download | only in aarch64
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include <cfloat>
     28 #include <cstdio>
     29 
     30 #include "test-runner.h"
     31 #include "test-utils.h"
     32 
     33 #include "aarch64/test-simulator-inputs-aarch64.h"
     34 #include "aarch64/test-simulator-traces-aarch64.h"
     35 #include "aarch64/test-utils-aarch64.h"
     36 
     37 #include "aarch64/macro-assembler-aarch64.h"
     38 #include "aarch64/simulator-aarch64.h"
     39 
     40 namespace vixl {
     41 namespace aarch64 {
     42 
     43 // ==== Simulator Tests ====
     44 //
     45 // These simulator tests check instruction behaviour against a trace taken from
     46 // real AArch64 hardware. The same test code is used to generate the trace; the
     47 // results are printed to stdout when the test is run with
     48 // --generate_test_trace.
     49 //
     50 // The input lists and expected results are stored in test/traces. The expected
     51 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
     52 // test for a new instruction is described at the top of
     53 // test-simulator-traces-aarch64.h.
     54 
     55 #define __ masm.
     56 #define TEST(name) TEST_(AARCH64_SIM_##name)
     57 
     58 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     59 
     60 #define SETUP()                                                                \
     61   MacroAssembler masm;                                                         \
     62   Decoder decoder;                                                             \
     63   Simulator* simulator =                                                       \
     64       Test::run_debugger() ? new Debugger(&decoder) : new Simulator(&decoder); \
     65   simulator->SetColouredTrace(Test::coloured_trace());                         \
     66   simulator->SetInstructionStats(Test::instruction_stats());
     67 
     68 #define START()                         \
     69   masm.Reset();                         \
     70   simulator->ResetState();              \
     71   __ PushCalleeSavedRegisters();        \
     72   if (Test::trace_reg()) {              \
     73     __ Trace(LOG_STATE, TRACE_ENABLE);  \
     74   }                                     \
     75   if (Test::trace_write()) {            \
     76     __ Trace(LOG_WRITE, TRACE_ENABLE);  \
     77   }                                     \
     78   if (Test::trace_sim()) {              \
     79     __ Trace(LOG_DISASM, TRACE_ENABLE); \
     80   }                                     \
     81   if (Test::instruction_stats()) {      \
     82     __ EnableInstrumentation();         \
     83   }
     84 
     85 #define END()                       \
     86   if (Test::instruction_stats()) {  \
     87     __ DisableInstrumentation();    \
     88   }                                 \
     89   __ Trace(LOG_ALL, TRACE_DISABLE); \
     90   __ PopCalleeSavedRegisters();     \
     91   __ Ret();                         \
     92   masm.FinalizeCode()
     93 
     94 #define RUN() \
     95   simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>())
     96 
     97 #define TEARDOWN() delete simulator;
     98 
     99 #else  // VIXL_INCLUDE_SIMULATOR_AARCH64
    100 
    101 #define SETUP()        \
    102   MacroAssembler masm; \
    103   CPU::SetUp()
    104 
    105 #define START() \
    106   masm.Reset(); \
    107   __ PushCalleeSavedRegisters()
    108 
    109 #define END()                   \
    110   __ PopCalleeSavedRegisters(); \
    111   __ Ret();                     \
    112   masm.FinalizeCode()
    113 
    114 #define RUN()                                                 \
    115   {                                                           \
    116     masm.GetBuffer()->SetExecutable();                        \
    117     ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \
    118                   masm.GetSizeOfCodeGenerated());             \
    119     masm.GetBuffer()->SetWritable();                          \
    120   }
    121 
    122 #define TEARDOWN()
    123 
    124 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
    125 
    126 
    127 // The maximum number of errors to report in detail for each test.
    128 static const unsigned kErrorReportLimit = 8;
    129 
    130 
    131 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
    132 // templated test functions.
    133 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
    134 
    135 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
    136 
    137 
    138 // MacroAssembler member function pointers to pass to the test dispatchers.
    139 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
    140                                                   const FPRegister& fn);
    141 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
    142                                                   const FPRegister& fn,
    143                                                   const FPRegister& fm);
    144 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
    145                                                   const FPRegister& fn,
    146                                                   const FPRegister& fm,
    147                                                   const FPRegister& fa);
    148 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
    149                                                   const FPRegister& fm);
    150 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
    151                                                       double value);
    152 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
    153                                                     const FPRegister& fn);
    154 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
    155                                                       const FPRegister& fn,
    156                                                       int fbits);
    157 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
    158                                                       const Register& rn,
    159                                                       int fbits);
    160 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
    161 //       consolidated into one routine.
    162 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
    163                                                     const VRegister& vn);
    164 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
    165                                                     const VRegister& vn,
    166                                                     const VRegister& vm);
    167 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
    168                                                           const VRegister& vn,
    169                                                           const VRegister& vm,
    170                                                           int vm_index);
    171 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
    172     const VRegister& vd, int imm1, const VRegister& vn, int imm2);
    173 
    174 // This helps using the same typename for both the function pointer
    175 // and the array of immediates passed to helper routines.
    176 template <typename T>
    177 class Test2OpImmediateNEONHelper_t {
    178  public:
    179   typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
    180                                            const VRegister& vn,
    181                                            T imm);
    182 };
    183 
    184 
    185 // Maximum number of hex characters required to represent values of either
    186 // templated type.
    187 template <typename Ta, typename Tb>
    188 static unsigned MaxHexCharCount() {
    189   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
    190   return (count * 8) / 4;
    191 }
    192 
    193 
    194 // Standard test dispatchers.
    195 
    196 
    197 static void Test1Op_Helper(Test1OpFPHelper_t helper,
    198                            uintptr_t inputs,
    199                            unsigned inputs_length,
    200                            uintptr_t results,
    201                            unsigned d_size,
    202                            unsigned n_size) {
    203   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
    204   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
    205 
    206   SETUP();
    207   START();
    208 
    209   // Roll up the loop to keep the code size down.
    210   Label loop_n;
    211 
    212   Register out = x0;
    213   Register inputs_base = x1;
    214   Register length = w2;
    215   Register index_n = w3;
    216 
    217   const int n_index_shift =
    218       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    219 
    220   FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
    221   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
    222 
    223   __ Mov(out, results);
    224   __ Mov(inputs_base, inputs);
    225   __ Mov(length, inputs_length);
    226 
    227   __ Mov(index_n, 0);
    228   __ Bind(&loop_n);
    229   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
    230 
    231   {
    232     SingleEmissionCheckScope guard(&masm);
    233     (masm.*helper)(fd, fn);
    234   }
    235   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
    236 
    237   __ Add(index_n, index_n, 1);
    238   __ Cmp(index_n, inputs_length);
    239   __ B(lo, &loop_n);
    240 
    241   END();
    242   RUN();
    243   TEARDOWN();
    244 }
    245 
    246 
    247 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    248 // rawbits representations of doubles or floats. This ensures that exact bit
    249 // comparisons can be performed.
    250 template <typename Tn, typename Td>
    251 static void Test1Op(const char* name,
    252                     Test1OpFPHelper_t helper,
    253                     const Tn inputs[],
    254                     unsigned inputs_length,
    255                     const Td expected[],
    256                     unsigned expected_length) {
    257   VIXL_ASSERT(inputs_length > 0);
    258 
    259   const unsigned results_length = inputs_length;
    260   Td* results = new Td[results_length];
    261 
    262   const unsigned d_bits = sizeof(Td) * 8;
    263   const unsigned n_bits = sizeof(Tn) * 8;
    264 
    265   Test1Op_Helper(helper,
    266                  reinterpret_cast<uintptr_t>(inputs),
    267                  inputs_length,
    268                  reinterpret_cast<uintptr_t>(results),
    269                  d_bits,
    270                  n_bits);
    271 
    272   if (Test::generate_test_trace()) {
    273     // Print the results.
    274     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
    275     for (unsigned d = 0; d < results_length; d++) {
    276       printf("  0x%0*" PRIx64 ",\n",
    277              d_bits / 4,
    278              static_cast<uint64_t>(results[d]));
    279     }
    280     printf("};\n");
    281     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    282   } else {
    283     // Check the results.
    284     VIXL_CHECK(expected_length == results_length);
    285     unsigned error_count = 0;
    286     unsigned d = 0;
    287     for (unsigned n = 0; n < inputs_length; n++, d++) {
    288       if (results[d] != expected[d]) {
    289         if (++error_count > kErrorReportLimit) continue;
    290 
    291         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
    292                name,
    293                n_bits / 4,
    294                static_cast<uint64_t>(inputs[n]),
    295                name,
    296                rawbits_to_fp(inputs[n]));
    297         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    298                d_bits / 4,
    299                static_cast<uint64_t>(expected[d]),
    300                rawbits_to_fp(expected[d]));
    301         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    302                d_bits / 4,
    303                static_cast<uint64_t>(results[d]),
    304                rawbits_to_fp(results[d]));
    305         printf("\n");
    306       }
    307     }
    308     VIXL_ASSERT(d == expected_length);
    309     if (error_count > kErrorReportLimit) {
    310       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    311     }
    312     VIXL_CHECK(error_count == 0);
    313   }
    314   delete[] results;
    315 }
    316 
    317 
    318 static void Test2Op_Helper(Test2OpFPHelper_t helper,
    319                            uintptr_t inputs,
    320                            unsigned inputs_length,
    321                            uintptr_t results,
    322                            unsigned reg_size) {
    323   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    324 
    325   SETUP();
    326   START();
    327 
    328   // Roll up the loop to keep the code size down.
    329   Label loop_n, loop_m;
    330 
    331   Register out = x0;
    332   Register inputs_base = x1;
    333   Register length = w2;
    334   Register index_n = w3;
    335   Register index_m = w4;
    336 
    337   bool double_op = reg_size == kDRegSize;
    338   const int index_shift =
    339       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    340 
    341   FPRegister fd = double_op ? d0 : s0;
    342   FPRegister fn = double_op ? d1 : s1;
    343   FPRegister fm = double_op ? d2 : s2;
    344 
    345   __ Mov(out, results);
    346   __ Mov(inputs_base, inputs);
    347   __ Mov(length, inputs_length);
    348 
    349   __ Mov(index_n, 0);
    350   __ Bind(&loop_n);
    351   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    352 
    353   __ Mov(index_m, 0);
    354   __ Bind(&loop_m);
    355   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    356 
    357   {
    358     SingleEmissionCheckScope guard(&masm);
    359     (masm.*helper)(fd, fn, fm);
    360   }
    361   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
    362 
    363   __ Add(index_m, index_m, 1);
    364   __ Cmp(index_m, inputs_length);
    365   __ B(lo, &loop_m);
    366 
    367   __ Add(index_n, index_n, 1);
    368   __ Cmp(index_n, inputs_length);
    369   __ B(lo, &loop_n);
    370 
    371   END();
    372   RUN();
    373   TEARDOWN();
    374 }
    375 
    376 
    377 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    378 // rawbits representations of doubles or floats. This ensures that exact bit
    379 // comparisons can be performed.
    380 template <typename T>
    381 static void Test2Op(const char* name,
    382                     Test2OpFPHelper_t helper,
    383                     const T inputs[],
    384                     unsigned inputs_length,
    385                     const T expected[],
    386                     unsigned expected_length) {
    387   VIXL_ASSERT(inputs_length > 0);
    388 
    389   const unsigned results_length = inputs_length * inputs_length;
    390   T* results = new T[results_length];
    391 
    392   const unsigned bits = sizeof(T) * 8;
    393 
    394   Test2Op_Helper(helper,
    395                  reinterpret_cast<uintptr_t>(inputs),
    396                  inputs_length,
    397                  reinterpret_cast<uintptr_t>(results),
    398                  bits);
    399 
    400   if (Test::generate_test_trace()) {
    401     // Print the results.
    402     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
    403     for (unsigned d = 0; d < results_length; d++) {
    404       printf("  0x%0*" PRIx64 ",\n",
    405              bits / 4,
    406              static_cast<uint64_t>(results[d]));
    407     }
    408     printf("};\n");
    409     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    410   } else {
    411     // Check the results.
    412     VIXL_CHECK(expected_length == results_length);
    413     unsigned error_count = 0;
    414     unsigned d = 0;
    415     for (unsigned n = 0; n < inputs_length; n++) {
    416       for (unsigned m = 0; m < inputs_length; m++, d++) {
    417         if (results[d] != expected[d]) {
    418           if (++error_count > kErrorReportLimit) continue;
    419 
    420           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
    421                  name,
    422                  bits / 4,
    423                  static_cast<uint64_t>(inputs[n]),
    424                  bits / 4,
    425                  static_cast<uint64_t>(inputs[m]),
    426                  name,
    427                  rawbits_to_fp(inputs[n]),
    428                  rawbits_to_fp(inputs[m]));
    429           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    430                  bits / 4,
    431                  static_cast<uint64_t>(expected[d]),
    432                  rawbits_to_fp(expected[d]));
    433           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    434                  bits / 4,
    435                  static_cast<uint64_t>(results[d]),
    436                  rawbits_to_fp(results[d]));
    437           printf("\n");
    438         }
    439       }
    440     }
    441     VIXL_ASSERT(d == expected_length);
    442     if (error_count > kErrorReportLimit) {
    443       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    444     }
    445     VIXL_CHECK(error_count == 0);
    446   }
    447   delete[] results;
    448 }
    449 
    450 
    451 static void Test3Op_Helper(Test3OpFPHelper_t helper,
    452                            uintptr_t inputs,
    453                            unsigned inputs_length,
    454                            uintptr_t results,
    455                            unsigned reg_size) {
    456   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    457 
    458   SETUP();
    459   START();
    460 
    461   // Roll up the loop to keep the code size down.
    462   Label loop_n, loop_m, loop_a;
    463 
    464   Register out = x0;
    465   Register inputs_base = x1;
    466   Register length = w2;
    467   Register index_n = w3;
    468   Register index_m = w4;
    469   Register index_a = w5;
    470 
    471   bool double_op = reg_size == kDRegSize;
    472   const int index_shift =
    473       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    474 
    475   FPRegister fd = double_op ? d0 : s0;
    476   FPRegister fn = double_op ? d1 : s1;
    477   FPRegister fm = double_op ? d2 : s2;
    478   FPRegister fa = double_op ? d3 : s3;
    479 
    480   __ Mov(out, results);
    481   __ Mov(inputs_base, inputs);
    482   __ Mov(length, inputs_length);
    483 
    484   __ Mov(index_n, 0);
    485   __ Bind(&loop_n);
    486   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    487 
    488   __ Mov(index_m, 0);
    489   __ Bind(&loop_m);
    490   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    491 
    492   __ Mov(index_a, 0);
    493   __ Bind(&loop_a);
    494   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
    495 
    496   {
    497     SingleEmissionCheckScope guard(&masm);
    498     (masm.*helper)(fd, fn, fm, fa);
    499   }
    500   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
    501 
    502   __ Add(index_a, index_a, 1);
    503   __ Cmp(index_a, inputs_length);
    504   __ B(lo, &loop_a);
    505 
    506   __ Add(index_m, index_m, 1);
    507   __ Cmp(index_m, inputs_length);
    508   __ B(lo, &loop_m);
    509 
    510   __ Add(index_n, index_n, 1);
    511   __ Cmp(index_n, inputs_length);
    512   __ B(lo, &loop_n);
    513 
    514   END();
    515   RUN();
    516   TEARDOWN();
    517 }
    518 
    519 
    520 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    521 // rawbits representations of doubles or floats. This ensures that exact bit
    522 // comparisons can be performed.
    523 template <typename T>
    524 static void Test3Op(const char* name,
    525                     Test3OpFPHelper_t helper,
    526                     const T inputs[],
    527                     unsigned inputs_length,
    528                     const T expected[],
    529                     unsigned expected_length) {
    530   VIXL_ASSERT(inputs_length > 0);
    531 
    532   const unsigned results_length = inputs_length * inputs_length * inputs_length;
    533   T* results = new T[results_length];
    534 
    535   const unsigned bits = sizeof(T) * 8;
    536 
    537   Test3Op_Helper(helper,
    538                  reinterpret_cast<uintptr_t>(inputs),
    539                  inputs_length,
    540                  reinterpret_cast<uintptr_t>(results),
    541                  bits);
    542 
    543   if (Test::generate_test_trace()) {
    544     // Print the results.
    545     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
    546     for (unsigned d = 0; d < results_length; d++) {
    547       printf("  0x%0*" PRIx64 ",\n",
    548              bits / 4,
    549              static_cast<uint64_t>(results[d]));
    550     }
    551     printf("};\n");
    552     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    553   } else {
    554     // Check the results.
    555     VIXL_CHECK(expected_length == results_length);
    556     unsigned error_count = 0;
    557     unsigned d = 0;
    558     for (unsigned n = 0; n < inputs_length; n++) {
    559       for (unsigned m = 0; m < inputs_length; m++) {
    560         for (unsigned a = 0; a < inputs_length; a++, d++) {
    561           if (results[d] != expected[d]) {
    562             if (++error_count > kErrorReportLimit) continue;
    563 
    564             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
    565                    " (%s %g %g %g):\n",
    566                    name,
    567                    bits / 4,
    568                    static_cast<uint64_t>(inputs[n]),
    569                    bits / 4,
    570                    static_cast<uint64_t>(inputs[m]),
    571                    bits / 4,
    572                    static_cast<uint64_t>(inputs[a]),
    573                    name,
    574                    rawbits_to_fp(inputs[n]),
    575                    rawbits_to_fp(inputs[m]),
    576                    rawbits_to_fp(inputs[a]));
    577             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
    578                    bits / 4,
    579                    static_cast<uint64_t>(expected[d]),
    580                    rawbits_to_fp(expected[d]));
    581             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
    582                    bits / 4,
    583                    static_cast<uint64_t>(results[d]),
    584                    rawbits_to_fp(results[d]));
    585             printf("\n");
    586           }
    587         }
    588       }
    589     }
    590     VIXL_ASSERT(d == expected_length);
    591     if (error_count > kErrorReportLimit) {
    592       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    593     }
    594     VIXL_CHECK(error_count == 0);
    595   }
    596   delete[] results;
    597 }
    598 
    599 
    600 static void TestCmp_Helper(TestFPCmpHelper_t helper,
    601                            uintptr_t inputs,
    602                            unsigned inputs_length,
    603                            uintptr_t results,
    604                            unsigned reg_size) {
    605   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    606 
    607   SETUP();
    608   START();
    609 
    610   // Roll up the loop to keep the code size down.
    611   Label loop_n, loop_m;
    612 
    613   Register out = x0;
    614   Register inputs_base = x1;
    615   Register length = w2;
    616   Register index_n = w3;
    617   Register index_m = w4;
    618   Register flags = x5;
    619 
    620   bool double_op = reg_size == kDRegSize;
    621   const int index_shift =
    622       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    623 
    624   FPRegister fn = double_op ? d1 : s1;
    625   FPRegister fm = double_op ? d2 : s2;
    626 
    627   __ Mov(out, results);
    628   __ Mov(inputs_base, inputs);
    629   __ Mov(length, inputs_length);
    630 
    631   __ Mov(index_n, 0);
    632   __ Bind(&loop_n);
    633   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    634 
    635   __ Mov(index_m, 0);
    636   __ Bind(&loop_m);
    637   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
    638 
    639   {
    640     SingleEmissionCheckScope guard(&masm);
    641     (masm.*helper)(fn, fm);
    642   }
    643   __ Mrs(flags, NZCV);
    644   __ Ubfx(flags, flags, 28, 4);
    645   __ Strb(flags, MemOperand(out, 1, PostIndex));
    646 
    647   __ Add(index_m, index_m, 1);
    648   __ Cmp(index_m, inputs_length);
    649   __ B(lo, &loop_m);
    650 
    651   __ Add(index_n, index_n, 1);
    652   __ Cmp(index_n, inputs_length);
    653   __ B(lo, &loop_n);
    654 
    655   END();
    656   RUN();
    657   TEARDOWN();
    658 }
    659 
    660 
    661 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    662 // rawbits representations of doubles or floats. This ensures that exact bit
    663 // comparisons can be performed.
    664 template <typename T>
    665 static void TestCmp(const char* name,
    666                     TestFPCmpHelper_t helper,
    667                     const T inputs[],
    668                     unsigned inputs_length,
    669                     const uint8_t expected[],
    670                     unsigned expected_length) {
    671   VIXL_ASSERT(inputs_length > 0);
    672 
    673   const unsigned results_length = inputs_length * inputs_length;
    674   uint8_t* results = new uint8_t[results_length];
    675 
    676   const unsigned bits = sizeof(T) * 8;
    677 
    678   TestCmp_Helper(helper,
    679                  reinterpret_cast<uintptr_t>(inputs),
    680                  inputs_length,
    681                  reinterpret_cast<uintptr_t>(results),
    682                  bits);
    683 
    684   if (Test::generate_test_trace()) {
    685     // Print the results.
    686     printf("const uint8_t kExpected_%s[] = {\n", name);
    687     for (unsigned d = 0; d < results_length; d++) {
    688       // Each NZCV result only requires 4 bits.
    689       VIXL_ASSERT((results[d] & 0xf) == results[d]);
    690       printf("  0x%" PRIx8 ",\n", results[d]);
    691     }
    692     printf("};\n");
    693     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    694   } else {
    695     // Check the results.
    696     VIXL_CHECK(expected_length == results_length);
    697     unsigned error_count = 0;
    698     unsigned d = 0;
    699     for (unsigned n = 0; n < inputs_length; n++) {
    700       for (unsigned m = 0; m < inputs_length; m++, d++) {
    701         if (results[d] != expected[d]) {
    702           if (++error_count > kErrorReportLimit) continue;
    703 
    704           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
    705                  name,
    706                  bits / 4,
    707                  static_cast<uint64_t>(inputs[n]),
    708                  bits / 4,
    709                  static_cast<uint64_t>(inputs[m]),
    710                  name,
    711                  rawbits_to_fp(inputs[n]),
    712                  rawbits_to_fp(inputs[m]));
    713           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
    714                  (expected[d] & 0x8) ? 'N' : 'n',
    715                  (expected[d] & 0x4) ? 'Z' : 'z',
    716                  (expected[d] & 0x2) ? 'C' : 'c',
    717                  (expected[d] & 0x1) ? 'V' : 'v',
    718                  expected[d]);
    719           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
    720                  (results[d] & 0x8) ? 'N' : 'n',
    721                  (results[d] & 0x4) ? 'Z' : 'z',
    722                  (results[d] & 0x2) ? 'C' : 'c',
    723                  (results[d] & 0x1) ? 'V' : 'v',
    724                  results[d]);
    725           printf("\n");
    726         }
    727       }
    728     }
    729     VIXL_ASSERT(d == expected_length);
    730     if (error_count > kErrorReportLimit) {
    731       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    732     }
    733     VIXL_CHECK(error_count == 0);
    734   }
    735   delete[] results;
    736 }
    737 
    738 
    739 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
    740                                uintptr_t inputs,
    741                                unsigned inputs_length,
    742                                uintptr_t results,
    743                                unsigned reg_size) {
    744   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
    745 
    746   SETUP();
    747   START();
    748 
    749   // Roll up the loop to keep the code size down.
    750   Label loop_n, loop_m;
    751 
    752   Register out = x0;
    753   Register inputs_base = x1;
    754   Register length = w2;
    755   Register index_n = w3;
    756   Register flags = x4;
    757 
    758   bool double_op = reg_size == kDRegSize;
    759   const int index_shift =
    760       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    761 
    762   FPRegister fn = double_op ? d1 : s1;
    763 
    764   __ Mov(out, results);
    765   __ Mov(inputs_base, inputs);
    766   __ Mov(length, inputs_length);
    767 
    768   __ Mov(index_n, 0);
    769   __ Bind(&loop_n);
    770   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
    771 
    772   {
    773     SingleEmissionCheckScope guard(&masm);
    774     (masm.*helper)(fn, 0.0);
    775   }
    776   __ Mrs(flags, NZCV);
    777   __ Ubfx(flags, flags, 28, 4);
    778   __ Strb(flags, MemOperand(out, 1, PostIndex));
    779 
    780   __ Add(index_n, index_n, 1);
    781   __ Cmp(index_n, inputs_length);
    782   __ B(lo, &loop_n);
    783 
    784   END();
    785   RUN();
    786   TEARDOWN();
    787 }
    788 
    789 
    790 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
    791 // rawbits representations of doubles or floats. This ensures that exact bit
    792 // comparisons can be performed.
    793 template <typename T>
    794 static void TestCmpZero(const char* name,
    795                         TestFPCmpZeroHelper_t helper,
    796                         const T inputs[],
    797                         unsigned inputs_length,
    798                         const uint8_t expected[],
    799                         unsigned expected_length) {
    800   VIXL_ASSERT(inputs_length > 0);
    801 
    802   const unsigned results_length = inputs_length;
    803   uint8_t* results = new uint8_t[results_length];
    804 
    805   const unsigned bits = sizeof(T) * 8;
    806 
    807   TestCmpZero_Helper(helper,
    808                      reinterpret_cast<uintptr_t>(inputs),
    809                      inputs_length,
    810                      reinterpret_cast<uintptr_t>(results),
    811                      bits);
    812 
    813   if (Test::generate_test_trace()) {
    814     // Print the results.
    815     printf("const uint8_t kExpected_%s[] = {\n", name);
    816     for (unsigned d = 0; d < results_length; d++) {
    817       // Each NZCV result only requires 4 bits.
    818       VIXL_ASSERT((results[d] & 0xf) == results[d]);
    819       printf("  0x%" PRIx8 ",\n", results[d]);
    820     }
    821     printf("};\n");
    822     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
    823   } else {
    824     // Check the results.
    825     VIXL_CHECK(expected_length == results_length);
    826     unsigned error_count = 0;
    827     unsigned d = 0;
    828     for (unsigned n = 0; n < inputs_length; n++, d++) {
    829       if (results[d] != expected[d]) {
    830         if (++error_count > kErrorReportLimit) continue;
    831 
    832         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
    833                name,
    834                bits / 4,
    835                static_cast<uint64_t>(inputs[n]),
    836                bits / 4,
    837                0,
    838                name,
    839                rawbits_to_fp(inputs[n]));
    840         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
    841                (expected[d] & 0x8) ? 'N' : 'n',
    842                (expected[d] & 0x4) ? 'Z' : 'z',
    843                (expected[d] & 0x2) ? 'C' : 'c',
    844                (expected[d] & 0x1) ? 'V' : 'v',
    845                expected[d]);
    846         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
    847                (results[d] & 0x8) ? 'N' : 'n',
    848                (results[d] & 0x4) ? 'Z' : 'z',
    849                (results[d] & 0x2) ? 'C' : 'c',
    850                (results[d] & 0x1) ? 'V' : 'v',
    851                results[d]);
    852         printf("\n");
    853       }
    854     }
    855     VIXL_ASSERT(d == expected_length);
    856     if (error_count > kErrorReportLimit) {
    857       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
    858     }
    859     VIXL_CHECK(error_count == 0);
    860   }
    861   delete[] results;
    862 }
    863 
    864 
    865 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
    866                                  uintptr_t inputs,
    867                                  unsigned inputs_length,
    868                                  uintptr_t results,
    869                                  unsigned d_size,
    870                                  unsigned n_size) {
    871   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
    872   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
    873 
    874   SETUP();
    875   START();
    876 
    877   // Roll up the loop to keep the code size down.
    878   Label loop_n;
    879 
    880   Register out = x0;
    881   Register inputs_base = x1;
    882   Register length = w2;
    883   Register index_n = w3;
    884 
    885   const int n_index_shift =
    886       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    887 
    888   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
    889   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
    890 
    891   __ Mov(out, results);
    892   __ Mov(inputs_base, inputs);
    893   __ Mov(length, inputs_length);
    894 
    895   __ Mov(index_n, 0);
    896   __ Bind(&loop_n);
    897   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
    898 
    899   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
    900     {
    901       SingleEmissionCheckScope guard(&masm);
    902       (masm.*helper)(rd, fn, fbits);
    903     }
    904     __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
    905   }
    906 
    907   __ Add(index_n, index_n, 1);
    908   __ Cmp(index_n, inputs_length);
    909   __ B(lo, &loop_n);
    910 
    911   END();
    912   RUN();
    913   TEARDOWN();
    914 }
    915 
    916 
    917 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
    918                                uintptr_t inputs,
    919                                unsigned inputs_length,
    920                                uintptr_t results,
    921                                unsigned d_size,
    922                                unsigned n_size) {
    923   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
    924   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
    925 
    926   SETUP();
    927   START();
    928 
    929   // Roll up the loop to keep the code size down.
    930   Label loop_n;
    931 
    932   Register out = x0;
    933   Register inputs_base = x1;
    934   Register length = w2;
    935   Register index_n = w3;
    936 
    937   const int n_index_shift =
    938       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
    939 
    940   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
    941   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
    942 
    943   __ Mov(out, results);
    944   __ Mov(inputs_base, inputs);
    945   __ Mov(length, inputs_length);
    946 
    947   __ Mov(index_n, 0);
    948   __ Bind(&loop_n);
    949   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
    950 
    951   {
    952     SingleEmissionCheckScope guard(&masm);
    953     (masm.*helper)(rd, fn);
    954   }
    955   __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
    956 
    957   __ Add(index_n, index_n, 1);
    958   __ Cmp(index_n, inputs_length);
    959   __ B(lo, &loop_n);
    960 
    961   END();
    962   RUN();
    963   TEARDOWN();
    964 }
    965 
    966 
    967 // Test FP instructions.
    968 //  - The inputs[] array should be an array of rawbits representations of
    969 //    doubles or floats. This ensures that exact bit comparisons can be
    970 //    performed.
    971 //  - The expected[] array should be an array of signed integers.
    972 template <typename Tn, typename Td>
    973 static void TestFPToS(const char* name,
    974                       TestFPToIntHelper_t helper,
    975                       const Tn inputs[],
    976                       unsigned inputs_length,
    977                       const Td expected[],
    978                       unsigned expected_length) {
    979   VIXL_ASSERT(inputs_length > 0);
    980 
    981   const unsigned results_length = inputs_length;
    982   Td* results = new Td[results_length];
    983 
    984   const unsigned d_bits = sizeof(Td) * 8;
    985   const unsigned n_bits = sizeof(Tn) * 8;
    986 
    987   TestFPToInt_Helper(helper,
    988                      reinterpret_cast<uintptr_t>(inputs),
    989                      inputs_length,
    990                      reinterpret_cast<uintptr_t>(results),
    991                      d_bits,
    992                      n_bits);
    993 
    994   if (Test::generate_test_trace()) {
    995     // Print the results.
    996     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
    997     // There is no simple C++ literal for INT*_MIN that doesn't produce
    998     // warnings, so we use an appropriate constant in that case instead.
    999     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
   1000     // the like) avoids warnings about comparing values with differing ranges.
   1001     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
   1002     const int64_t int_d_min = -(int_d_max)-1;
   1003     for (unsigned d = 0; d < results_length; d++) {
   1004       if (results[d] == int_d_min) {
   1005         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
   1006       } else {
   1007         // Some constants (such as those between INT32_MAX and UINT32_MAX)
   1008         // trigger compiler warnings. To avoid these warnings, use an
   1009         // appropriate macro to make the type explicit.
   1010         int64_t result_int64 = static_cast<int64_t>(results[d]);
   1011         if (result_int64 >= 0) {
   1012           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
   1013         } else {
   1014           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
   1015         }
   1016       }
   1017     }
   1018     printf("};\n");
   1019     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1020   } else {
   1021     // Check the results.
   1022     VIXL_CHECK(expected_length == results_length);
   1023     unsigned error_count = 0;
   1024     unsigned d = 0;
   1025     for (unsigned n = 0; n < inputs_length; n++, d++) {
   1026       if (results[d] != expected[d]) {
   1027         if (++error_count > kErrorReportLimit) continue;
   1028 
   1029         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
   1030                name,
   1031                n_bits / 4,
   1032                static_cast<uint64_t>(inputs[n]),
   1033                name,
   1034                rawbits_to_fp(inputs[n]));
   1035         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
   1036                d_bits / 4,
   1037                static_cast<uint64_t>(expected[d]),
   1038                static_cast<int64_t>(expected[d]));
   1039         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
   1040                d_bits / 4,
   1041                static_cast<uint64_t>(results[d]),
   1042                static_cast<int64_t>(results[d]));
   1043         printf("\n");
   1044       }
   1045     }
   1046     VIXL_ASSERT(d == expected_length);
   1047     if (error_count > kErrorReportLimit) {
   1048       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1049     }
   1050     VIXL_CHECK(error_count == 0);
   1051   }
   1052   delete[] results;
   1053 }
   1054 
   1055 
   1056 // Test FP instructions.
   1057 //  - The inputs[] array should be an array of rawbits representations of
   1058 //    doubles or floats. This ensures that exact bit comparisons can be
   1059 //    performed.
   1060 //  - The expected[] array should be an array of unsigned integers.
   1061 template <typename Tn, typename Td>
   1062 static void TestFPToU(const char* name,
   1063                       TestFPToIntHelper_t helper,
   1064                       const Tn inputs[],
   1065                       unsigned inputs_length,
   1066                       const Td expected[],
   1067                       unsigned expected_length) {
   1068   VIXL_ASSERT(inputs_length > 0);
   1069 
   1070   const unsigned results_length = inputs_length;
   1071   Td* results = new Td[results_length];
   1072 
   1073   const unsigned d_bits = sizeof(Td) * 8;
   1074   const unsigned n_bits = sizeof(Tn) * 8;
   1075 
   1076   TestFPToInt_Helper(helper,
   1077                      reinterpret_cast<uintptr_t>(inputs),
   1078                      inputs_length,
   1079                      reinterpret_cast<uintptr_t>(results),
   1080                      d_bits,
   1081                      n_bits);
   1082 
   1083   if (Test::generate_test_trace()) {
   1084     // Print the results.
   1085     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
   1086     for (unsigned d = 0; d < results_length; d++) {
   1087       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
   1088     }
   1089     printf("};\n");
   1090     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1091   } else {
   1092     // Check the results.
   1093     VIXL_CHECK(expected_length == results_length);
   1094     unsigned error_count = 0;
   1095     unsigned d = 0;
   1096     for (unsigned n = 0; n < inputs_length; n++, d++) {
   1097       if (results[d] != expected[d]) {
   1098         if (++error_count > kErrorReportLimit) continue;
   1099 
   1100         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
   1101                name,
   1102                n_bits / 4,
   1103                static_cast<uint64_t>(inputs[n]),
   1104                name,
   1105                rawbits_to_fp(inputs[n]));
   1106         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1107                d_bits / 4,
   1108                static_cast<uint64_t>(expected[d]),
   1109                static_cast<uint64_t>(expected[d]));
   1110         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1111                d_bits / 4,
   1112                static_cast<uint64_t>(results[d]),
   1113                static_cast<uint64_t>(results[d]));
   1114         printf("\n");
   1115       }
   1116     }
   1117     VIXL_ASSERT(d == expected_length);
   1118     if (error_count > kErrorReportLimit) {
   1119       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1120     }
   1121     VIXL_CHECK(error_count == 0);
   1122   }
   1123   delete[] results;
   1124 }
   1125 
   1126 
   1127 // Test FP instructions.
   1128 //  - The inputs[] array should be an array of rawbits representations of
   1129 //    doubles or floats. This ensures that exact bit comparisons can be
   1130 //    performed.
   1131 //  - The expected[] array should be an array of signed integers.
   1132 template <typename Tn, typename Td>
   1133 static void TestFPToFixedS(const char* name,
   1134                            TestFPToFixedHelper_t helper,
   1135                            const Tn inputs[],
   1136                            unsigned inputs_length,
   1137                            const Td expected[],
   1138                            unsigned expected_length) {
   1139   VIXL_ASSERT(inputs_length > 0);
   1140 
   1141   const unsigned d_bits = sizeof(Td) * 8;
   1142   const unsigned n_bits = sizeof(Tn) * 8;
   1143 
   1144   const unsigned results_length = inputs_length * (d_bits + 1);
   1145   Td* results = new Td[results_length];
   1146 
   1147   TestFPToFixed_Helper(helper,
   1148                        reinterpret_cast<uintptr_t>(inputs),
   1149                        inputs_length,
   1150                        reinterpret_cast<uintptr_t>(results),
   1151                        d_bits,
   1152                        n_bits);
   1153 
   1154   if (Test::generate_test_trace()) {
   1155     // Print the results.
   1156     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
   1157     // There is no simple C++ literal for INT*_MIN that doesn't produce
   1158     // warnings, so we use an appropriate constant in that case instead.
   1159     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
   1160     // the like) avoids warnings about comparing values with differing ranges.
   1161     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
   1162     const int64_t int_d_min = -(int_d_max)-1;
   1163     for (unsigned d = 0; d < results_length; d++) {
   1164       if (results[d] == int_d_min) {
   1165         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
   1166       } else {
   1167         // Some constants (such as those between INT32_MAX and UINT32_MAX)
   1168         // trigger compiler warnings. To avoid these warnings, use an
   1169         // appropriate macro to make the type explicit.
   1170         int64_t result_int64 = static_cast<int64_t>(results[d]);
   1171         if (result_int64 >= 0) {
   1172           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
   1173         } else {
   1174           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
   1175         }
   1176       }
   1177     }
   1178     printf("};\n");
   1179     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1180   } else {
   1181     // Check the results.
   1182     VIXL_CHECK(expected_length == results_length);
   1183     unsigned error_count = 0;
   1184     unsigned d = 0;
   1185     for (unsigned n = 0; n < inputs_length; n++) {
   1186       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
   1187         if (results[d] != expected[d]) {
   1188           if (++error_count > kErrorReportLimit) continue;
   1189 
   1190           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
   1191                  name,
   1192                  n_bits / 4,
   1193                  static_cast<uint64_t>(inputs[n]),
   1194                  fbits,
   1195                  name,
   1196                  rawbits_to_fp(inputs[n]),
   1197                  fbits);
   1198           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
   1199                  d_bits / 4,
   1200                  static_cast<uint64_t>(expected[d]),
   1201                  static_cast<int64_t>(expected[d]));
   1202           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
   1203                  d_bits / 4,
   1204                  static_cast<uint64_t>(results[d]),
   1205                  static_cast<int64_t>(results[d]));
   1206           printf("\n");
   1207         }
   1208       }
   1209     }
   1210     VIXL_ASSERT(d == expected_length);
   1211     if (error_count > kErrorReportLimit) {
   1212       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1213     }
   1214     VIXL_CHECK(error_count == 0);
   1215   }
   1216   delete[] results;
   1217 }
   1218 
   1219 
   1220 // Test FP instructions.
   1221 //  - The inputs[] array should be an array of rawbits representations of
   1222 //    doubles or floats. This ensures that exact bit comparisons can be
   1223 //    performed.
   1224 //  - The expected[] array should be an array of unsigned integers.
   1225 template <typename Tn, typename Td>
   1226 static void TestFPToFixedU(const char* name,
   1227                            TestFPToFixedHelper_t helper,
   1228                            const Tn inputs[],
   1229                            unsigned inputs_length,
   1230                            const Td expected[],
   1231                            unsigned expected_length) {
   1232   VIXL_ASSERT(inputs_length > 0);
   1233 
   1234   const unsigned d_bits = sizeof(Td) * 8;
   1235   const unsigned n_bits = sizeof(Tn) * 8;
   1236 
   1237   const unsigned results_length = inputs_length * (d_bits + 1);
   1238   Td* results = new Td[results_length];
   1239 
   1240   TestFPToFixed_Helper(helper,
   1241                        reinterpret_cast<uintptr_t>(inputs),
   1242                        inputs_length,
   1243                        reinterpret_cast<uintptr_t>(results),
   1244                        d_bits,
   1245                        n_bits);
   1246 
   1247   if (Test::generate_test_trace()) {
   1248     // Print the results.
   1249     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
   1250     for (unsigned d = 0; d < results_length; d++) {
   1251       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
   1252     }
   1253     printf("};\n");
   1254     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
   1255   } else {
   1256     // Check the results.
   1257     VIXL_CHECK(expected_length == results_length);
   1258     unsigned error_count = 0;
   1259     unsigned d = 0;
   1260     for (unsigned n = 0; n < inputs_length; n++) {
   1261       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
   1262         if (results[d] != expected[d]) {
   1263           if (++error_count > kErrorReportLimit) continue;
   1264 
   1265           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
   1266                  name,
   1267                  n_bits / 4,
   1268                  static_cast<uint64_t>(inputs[n]),
   1269                  fbits,
   1270                  name,
   1271                  rawbits_to_fp(inputs[n]),
   1272                  fbits);
   1273           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1274                  d_bits / 4,
   1275                  static_cast<uint64_t>(expected[d]),
   1276                  static_cast<uint64_t>(expected[d]));
   1277           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
   1278                  d_bits / 4,
   1279                  static_cast<uint64_t>(results[d]),
   1280                  static_cast<uint64_t>(results[d]));
   1281           printf("\n");
   1282         }
   1283       }
   1284     }
   1285     VIXL_ASSERT(d == expected_length);
   1286     if (error_count > kErrorReportLimit) {
   1287       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1288     }
   1289     VIXL_CHECK(error_count == 0);
   1290   }
   1291   delete[] results;
   1292 }
   1293 
   1294 
   1295 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
   1296 
   1297 
   1298 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
   1299                                uintptr_t inputs_n,
   1300                                unsigned inputs_n_length,
   1301                                uintptr_t results,
   1302                                VectorFormat vd_form,
   1303                                VectorFormat vn_form) {
   1304   VIXL_ASSERT(vd_form != kFormatUndefined);
   1305   VIXL_ASSERT(vn_form != kFormatUndefined);
   1306 
   1307   SETUP();
   1308   START();
   1309 
   1310   // Roll up the loop to keep the code size down.
   1311   Label loop_n;
   1312 
   1313   Register out = x0;
   1314   Register inputs_n_base = x1;
   1315   Register inputs_n_last_16bytes = x3;
   1316   Register index_n = x5;
   1317 
   1318   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1319   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1320   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1321 
   1322   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1323   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1324   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1325   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1326   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1327 
   1328 
   1329   // These will be either a D- or a Q-register form, with a single lane
   1330   // (for use in scalar load and store operations).
   1331   VRegister vd = VRegister(0, vd_bits);
   1332   VRegister vn = v1.V16B();
   1333   VRegister vntmp = v3.V16B();
   1334 
   1335   // These will have the correct format for use when calling 'helper'.
   1336   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
   1337   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1338 
   1339   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1340   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1341 
   1342   __ Mov(out, results);
   1343 
   1344   __ Mov(inputs_n_base, inputs_n);
   1345   __ Mov(inputs_n_last_16bytes,
   1346          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
   1347 
   1348   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   1349 
   1350   __ Mov(index_n, 0);
   1351   __ Bind(&loop_n);
   1352 
   1353   __ Ldr(vntmp_single,
   1354          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   1355   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   1356 
   1357   // Set the destination to zero.
   1358   // TODO: Setting the destination to values other than zero
   1359   //       might be a better test for instructions such as sqxtn2
   1360   //       which may leave parts of V registers unchanged.
   1361   __ Movi(vd.V16B(), 0);
   1362 
   1363   {
   1364     SingleEmissionCheckScope guard(&masm);
   1365     (masm.*helper)(vd_helper, vn_helper);
   1366   }
   1367   __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   1368 
   1369   __ Add(index_n, index_n, 1);
   1370   __ Cmp(index_n, inputs_n_length);
   1371   __ B(lo, &loop_n);
   1372 
   1373   END();
   1374   RUN();
   1375   TEARDOWN();
   1376 }
   1377 
   1378 
   1379 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1380 // arrays of rawbit representation of input values. This ensures that
   1381 // exact bit comparisons can be performed.
   1382 template <typename Td, typename Tn>
   1383 static void Test1OpNEON(const char* name,
   1384                         Test1OpNEONHelper_t helper,
   1385                         const Tn inputs_n[],
   1386                         unsigned inputs_n_length,
   1387                         const Td expected[],
   1388                         unsigned expected_length,
   1389                         VectorFormat vd_form,
   1390                         VectorFormat vn_form) {
   1391   VIXL_ASSERT(inputs_n_length > 0);
   1392 
   1393   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1394   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1395   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1396 
   1397   const unsigned results_length = inputs_n_length;
   1398   Td* results = new Td[results_length * vd_lane_count];
   1399   const unsigned lane_bit = sizeof(Td) * 8;
   1400   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   1401 
   1402   Test1OpNEON_Helper(helper,
   1403                      reinterpret_cast<uintptr_t>(inputs_n),
   1404                      inputs_n_length,
   1405                      reinterpret_cast<uintptr_t>(results),
   1406                      vd_form,
   1407                      vn_form);
   1408 
   1409   if (Test::generate_test_trace()) {
   1410     // Print the results.
   1411     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1412     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1413       printf(" ");
   1414       // Output a separate result for each element of the result vector.
   1415       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1416         unsigned index = lane + (iteration * vd_lane_count);
   1417         printf(" 0x%0*" PRIx64 ",",
   1418                lane_len_in_hex,
   1419                static_cast<uint64_t>(results[index]));
   1420       }
   1421       printf("\n");
   1422     }
   1423 
   1424     printf("};\n");
   1425     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1426            name,
   1427            results_length);
   1428   } else {
   1429     // Check the results.
   1430     VIXL_CHECK(expected_length == results_length);
   1431     unsigned error_count = 0;
   1432     unsigned d = 0;
   1433     const char* padding = "                    ";
   1434     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1435     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
   1436       bool error_in_vector = false;
   1437 
   1438       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1439         unsigned output_index = (n * vd_lane_count) + lane;
   1440 
   1441         if (results[output_index] != expected[output_index]) {
   1442           error_in_vector = true;
   1443           break;
   1444         }
   1445       }
   1446 
   1447       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1448         printf("%s\n", name);
   1449         printf(" Vn%.*s| Vd%.*s| Expected\n",
   1450                lane_len_in_hex + 1,
   1451                padding,
   1452                lane_len_in_hex + 1,
   1453                padding);
   1454 
   1455         const unsigned first_index_n =
   1456             inputs_n_length - (16 / vn_lane_bytes) + n + 1;
   1457 
   1458         for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
   1459              lane++) {
   1460           unsigned output_index = (n * vd_lane_count) + lane;
   1461           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
   1462 
   1463           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
   1464                  " "
   1465                  "| 0x%0*" PRIx64 "\n",
   1466                  results[output_index] != expected[output_index] ? '*' : ' ',
   1467                  lane_len_in_hex,
   1468                  static_cast<uint64_t>(inputs_n[input_index_n]),
   1469                  lane_len_in_hex,
   1470                  static_cast<uint64_t>(results[output_index]),
   1471                  lane_len_in_hex,
   1472                  static_cast<uint64_t>(expected[output_index]));
   1473         }
   1474       }
   1475     }
   1476     VIXL_ASSERT(d == expected_length);
   1477     if (error_count > kErrorReportLimit) {
   1478       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1479     }
   1480     VIXL_CHECK(error_count == 0);
   1481   }
   1482   delete[] results;
   1483 }
   1484 
   1485 
   1486 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
   1487 //      where <V> is one of B, H, S or D registers.
   1488 //      e.g. saddlv H1, v0.8B
   1489 
   1490 // TODO: Change tests to store all lanes of the resulting V register.
   1491 //       Some tests store all 128 bits of the resulting V register to
   1492 //       check the simulator's behaviour on the rest of the register.
   1493 //       This is better than storing the affected lanes only.
   1494 //       Change any tests such as the 'Across' template to do the same.
   1495 
   1496 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
   1497                                      uintptr_t inputs_n,
   1498                                      unsigned inputs_n_length,
   1499                                      uintptr_t results,
   1500                                      VectorFormat vd_form,
   1501                                      VectorFormat vn_form) {
   1502   VIXL_ASSERT(vd_form != kFormatUndefined);
   1503   VIXL_ASSERT(vn_form != kFormatUndefined);
   1504 
   1505   SETUP();
   1506   START();
   1507 
   1508   // Roll up the loop to keep the code size down.
   1509   Label loop_n;
   1510 
   1511   Register out = x0;
   1512   Register inputs_n_base = x1;
   1513   Register inputs_n_last_vector = x3;
   1514   Register index_n = x5;
   1515 
   1516   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1517   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1518   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1519   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1520   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1521   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1522   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1523 
   1524   // Test destructive operations by (arbitrarily) using the same register for
   1525   // B and S lane sizes.
   1526   bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
   1527 
   1528   // Create two aliases for v0; the first is the destination for the tested
   1529   // instruction, the second, the whole Q register to check the results.
   1530   VRegister vd = VRegister(0, vd_bits);
   1531   VRegister vdstr = VRegister(0, kQRegSize);
   1532 
   1533   VRegister vn = VRegister(1, vn_bits);
   1534   VRegister vntmp = VRegister(3, vn_bits);
   1535 
   1536   // These will have the correct format for use when calling 'helper'.
   1537   VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
   1538   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1539 
   1540   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1541   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1542 
   1543   // Same registers for use in the 'ext' instructions.
   1544   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
   1545   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
   1546 
   1547   __ Mov(out, results);
   1548 
   1549   __ Mov(inputs_n_base, inputs_n);
   1550   __ Mov(inputs_n_last_vector,
   1551          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
   1552 
   1553   __ Ldr(vn, MemOperand(inputs_n_last_vector));
   1554 
   1555   __ Mov(index_n, 0);
   1556   __ Bind(&loop_n);
   1557 
   1558   __ Ldr(vntmp_single,
   1559          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   1560   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
   1561 
   1562   if (destructive) {
   1563     __ Mov(vd_helper, vn_helper);
   1564     SingleEmissionCheckScope guard(&masm);
   1565     (masm.*helper)(vd, vd_helper);
   1566   } else {
   1567     SingleEmissionCheckScope guard(&masm);
   1568     (masm.*helper)(vd, vn_helper);
   1569   }
   1570 
   1571   __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
   1572 
   1573   __ Add(index_n, index_n, 1);
   1574   __ Cmp(index_n, inputs_n_length);
   1575   __ B(lo, &loop_n);
   1576 
   1577   END();
   1578   RUN();
   1579   TEARDOWN();
   1580 }
   1581 
   1582 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1583 // arrays of rawbit representation of input values. This ensures that
   1584 // exact bit comparisons can be performed.
   1585 template <typename Td, typename Tn>
   1586 static void Test1OpAcrossNEON(const char* name,
   1587                               Test1OpNEONHelper_t helper,
   1588                               const Tn inputs_n[],
   1589                               unsigned inputs_n_length,
   1590                               const Td expected[],
   1591                               unsigned expected_length,
   1592                               VectorFormat vd_form,
   1593                               VectorFormat vn_form) {
   1594   VIXL_ASSERT(inputs_n_length > 0);
   1595 
   1596   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1597   const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
   1598 
   1599   const unsigned results_length = inputs_n_length;
   1600   Td* results = new Td[results_length * vd_lanes_per_q];
   1601   const unsigned lane_bit = sizeof(Td) * 8;
   1602   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   1603 
   1604   Test1OpAcrossNEON_Helper(helper,
   1605                            reinterpret_cast<uintptr_t>(inputs_n),
   1606                            inputs_n_length,
   1607                            reinterpret_cast<uintptr_t>(results),
   1608                            vd_form,
   1609                            vn_form);
   1610 
   1611   if (Test::generate_test_trace()) {
   1612     // Print the results.
   1613     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1614     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1615       printf(" ");
   1616       // Output a separate result for each element of the result vector.
   1617       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1618         unsigned index = lane + (iteration * vd_lane_count);
   1619         printf(" 0x%0*" PRIx64 ",",
   1620                lane_len_in_hex,
   1621                static_cast<uint64_t>(results[index]));
   1622       }
   1623       printf("\n");
   1624     }
   1625 
   1626     printf("};\n");
   1627     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1628            name,
   1629            results_length);
   1630   } else {
   1631     // Check the results.
   1632     VIXL_CHECK(expected_length == results_length);
   1633     unsigned error_count = 0;
   1634     unsigned d = 0;
   1635     const char* padding = "                    ";
   1636     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1637     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
   1638       bool error_in_vector = false;
   1639 
   1640       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1641         unsigned expected_index = (n * vd_lane_count) + lane;
   1642         unsigned results_index = (n * vd_lanes_per_q) + lane;
   1643 
   1644         if (results[results_index] != expected[expected_index]) {
   1645           error_in_vector = true;
   1646           break;
   1647         }
   1648       }
   1649 
   1650       // For across operations, the remaining lanes should be zero.
   1651       for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
   1652         unsigned results_index = (n * vd_lanes_per_q) + lane;
   1653         if (results[results_index] != 0) {
   1654           error_in_vector = true;
   1655           break;
   1656         }
   1657       }
   1658 
   1659       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1660         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1661 
   1662         printf("%s\n", name);
   1663         printf(" Vn%.*s| Vd%.*s| Expected\n",
   1664                lane_len_in_hex + 1,
   1665                padding,
   1666                lane_len_in_hex + 1,
   1667                padding);
   1668 
   1669         // TODO: In case of an error, all tests print out as many elements as
   1670         //       there are lanes in the output or input vectors. This way
   1671         //       the viewer can read all the values that were needed for the
   1672         //       operation but the output contains also unnecessary values.
   1673         //       These prints can be improved according to the arguments
   1674         //       passed to test functions.
   1675         //       This output for the 'Across' category has the required
   1676         //       modifications.
   1677         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
   1678           unsigned results_index =
   1679               (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
   1680           unsigned input_index_n =
   1681               (inputs_n_length - vn_lane_count + n + 1 + lane) %
   1682               inputs_n_length;
   1683 
   1684           Td expect = 0;
   1685           if ((vn_lane_count - 1) == lane) {
   1686             // This is the last lane to be printed, ie. the least-significant
   1687             // lane, so use the expected value; any other lane should be zero.
   1688             unsigned expected_index = n * vd_lane_count;
   1689             expect = expected[expected_index];
   1690           }
   1691           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   1692                  results[results_index] != expect ? '*' : ' ',
   1693                  lane_len_in_hex,
   1694                  static_cast<uint64_t>(inputs_n[input_index_n]),
   1695                  lane_len_in_hex,
   1696                  static_cast<uint64_t>(results[results_index]),
   1697                  lane_len_in_hex,
   1698                  static_cast<uint64_t>(expect));
   1699         }
   1700       }
   1701     }
   1702     VIXL_ASSERT(d == expected_length);
   1703     if (error_count > kErrorReportLimit) {
   1704       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1705     }
   1706     VIXL_CHECK(error_count == 0);
   1707   }
   1708   delete[] results;
   1709 }
   1710 
   1711 
   1712 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
   1713 
   1714 // TODO: Iterate over inputs_d once the traces file is split.
   1715 
   1716 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
   1717                                uintptr_t inputs_d,
   1718                                uintptr_t inputs_n,
   1719                                unsigned inputs_n_length,
   1720                                uintptr_t inputs_m,
   1721                                unsigned inputs_m_length,
   1722                                uintptr_t results,
   1723                                VectorFormat vd_form,
   1724                                VectorFormat vn_form,
   1725                                VectorFormat vm_form) {
   1726   VIXL_ASSERT(vd_form != kFormatUndefined);
   1727   VIXL_ASSERT(vn_form != kFormatUndefined);
   1728   VIXL_ASSERT(vm_form != kFormatUndefined);
   1729 
   1730   SETUP();
   1731   START();
   1732 
   1733   // Roll up the loop to keep the code size down.
   1734   Label loop_n, loop_m;
   1735 
   1736   Register out = x0;
   1737   Register inputs_n_base = x1;
   1738   Register inputs_m_base = x2;
   1739   Register inputs_d_base = x3;
   1740   Register inputs_n_last_16bytes = x4;
   1741   Register inputs_m_last_16bytes = x5;
   1742   Register index_n = x6;
   1743   Register index_m = x7;
   1744 
   1745   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1746   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1747   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1748 
   1749   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1750   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1751   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1752   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1753   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1754 
   1755   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
   1756   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
   1757   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
   1758   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
   1759   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
   1760 
   1761 
   1762   // Always load and store 128 bits regardless of the format.
   1763   VRegister vd = v0.V16B();
   1764   VRegister vn = v1.V16B();
   1765   VRegister vm = v2.V16B();
   1766   VRegister vntmp = v3.V16B();
   1767   VRegister vmtmp = v4.V16B();
   1768   VRegister vres = v5.V16B();
   1769 
   1770   // These will have the correct format for calling the 'helper'.
   1771   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   1772   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
   1773   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   1774 
   1775   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   1776   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   1777   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
   1778 
   1779   __ Mov(out, results);
   1780 
   1781   __ Mov(inputs_d_base, inputs_d);
   1782 
   1783   __ Mov(inputs_n_base, inputs_n);
   1784   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
   1785   __ Mov(inputs_m_base, inputs_m);
   1786   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
   1787 
   1788   __ Ldr(vd, MemOperand(inputs_d_base));
   1789   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   1790   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
   1791 
   1792   __ Mov(index_n, 0);
   1793   __ Bind(&loop_n);
   1794 
   1795   __ Ldr(vntmp_single,
   1796          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   1797   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   1798 
   1799   __ Mov(index_m, 0);
   1800   __ Bind(&loop_m);
   1801 
   1802   __ Ldr(vmtmp_single,
   1803          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
   1804   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
   1805 
   1806   __ Mov(vres, vd);
   1807   {
   1808     SingleEmissionCheckScope guard(&masm);
   1809     (masm.*helper)(vres_helper, vn_helper, vm_helper);
   1810   }
   1811   __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   1812 
   1813   __ Add(index_m, index_m, 1);
   1814   __ Cmp(index_m, inputs_m_length);
   1815   __ B(lo, &loop_m);
   1816 
   1817   __ Add(index_n, index_n, 1);
   1818   __ Cmp(index_n, inputs_n_length);
   1819   __ B(lo, &loop_n);
   1820 
   1821   END();
   1822   RUN();
   1823   TEARDOWN();
   1824 }
   1825 
   1826 
   1827 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   1828 // arrays of rawbit representation of input values. This ensures that
   1829 // exact bit comparisons can be performed.
   1830 template <typename Td, typename Tn, typename Tm>
   1831 static void Test2OpNEON(const char* name,
   1832                         Test2OpNEONHelper_t helper,
   1833                         const Td inputs_d[],
   1834                         const Tn inputs_n[],
   1835                         unsigned inputs_n_length,
   1836                         const Tm inputs_m[],
   1837                         unsigned inputs_m_length,
   1838                         const Td expected[],
   1839                         unsigned expected_length,
   1840                         VectorFormat vd_form,
   1841                         VectorFormat vn_form,
   1842                         VectorFormat vm_form) {
   1843   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
   1844 
   1845   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
   1846 
   1847   const unsigned results_length = inputs_n_length * inputs_m_length;
   1848   Td* results = new Td[results_length * vd_lane_count];
   1849   const unsigned lane_bit = sizeof(Td) * 8;
   1850   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
   1851 
   1852   Test2OpNEON_Helper(helper,
   1853                      reinterpret_cast<uintptr_t>(inputs_d),
   1854                      reinterpret_cast<uintptr_t>(inputs_n),
   1855                      inputs_n_length,
   1856                      reinterpret_cast<uintptr_t>(inputs_m),
   1857                      inputs_m_length,
   1858                      reinterpret_cast<uintptr_t>(results),
   1859                      vd_form,
   1860                      vn_form,
   1861                      vm_form);
   1862 
   1863   if (Test::generate_test_trace()) {
   1864     // Print the results.
   1865     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   1866     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   1867       printf(" ");
   1868       // Output a separate result for each element of the result vector.
   1869       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1870         unsigned index = lane + (iteration * vd_lane_count);
   1871         printf(" 0x%0*" PRIx64 ",",
   1872                lane_len_in_hex,
   1873                static_cast<uint64_t>(results[index]));
   1874       }
   1875       printf("\n");
   1876     }
   1877 
   1878     printf("};\n");
   1879     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   1880            name,
   1881            results_length);
   1882   } else {
   1883     // Check the results.
   1884     VIXL_CHECK(expected_length == results_length);
   1885     unsigned error_count = 0;
   1886     unsigned d = 0;
   1887     const char* padding = "                    ";
   1888     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   1889     for (unsigned n = 0; n < inputs_n_length; n++) {
   1890       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
   1891         bool error_in_vector = false;
   1892 
   1893         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1894           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   1895                                   (m * vd_lane_count) + lane;
   1896 
   1897           if (results[output_index] != expected[output_index]) {
   1898             error_in_vector = true;
   1899             break;
   1900           }
   1901         }
   1902 
   1903         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   1904           printf("%s\n", name);
   1905           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
   1906                  lane_len_in_hex + 1,
   1907                  padding,
   1908                  lane_len_in_hex + 1,
   1909                  padding,
   1910                  lane_len_in_hex + 1,
   1911                  padding,
   1912                  lane_len_in_hex + 1,
   1913                  padding);
   1914 
   1915           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   1916             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   1917                                     (m * vd_lane_count) + lane;
   1918             unsigned input_index_n =
   1919                 (inputs_n_length - vd_lane_count + n + 1 + lane) %
   1920                 inputs_n_length;
   1921             unsigned input_index_m =
   1922                 (inputs_m_length - vd_lane_count + m + 1 + lane) %
   1923                 inputs_m_length;
   1924 
   1925             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
   1926                    " "
   1927                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   1928                    results[output_index] != expected[output_index] ? '*' : ' ',
   1929                    lane_len_in_hex,
   1930                    static_cast<uint64_t>(inputs_d[lane]),
   1931                    lane_len_in_hex,
   1932                    static_cast<uint64_t>(inputs_n[input_index_n]),
   1933                    lane_len_in_hex,
   1934                    static_cast<uint64_t>(inputs_m[input_index_m]),
   1935                    lane_len_in_hex,
   1936                    static_cast<uint64_t>(results[output_index]),
   1937                    lane_len_in_hex,
   1938                    static_cast<uint64_t>(expected[output_index]));
   1939           }
   1940         }
   1941       }
   1942     }
   1943     VIXL_ASSERT(d == expected_length);
   1944     if (error_count > kErrorReportLimit) {
   1945       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   1946     }
   1947     VIXL_CHECK(error_count == 0);
   1948   }
   1949   delete[] results;
   1950 }
   1951 
   1952 
   1953 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
   1954 
   1955 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
   1956                                      uintptr_t inputs_d,
   1957                                      uintptr_t inputs_n,
   1958                                      unsigned inputs_n_length,
   1959                                      uintptr_t inputs_m,
   1960                                      unsigned inputs_m_length,
   1961                                      const int indices[],
   1962                                      unsigned indices_length,
   1963                                      uintptr_t results,
   1964                                      VectorFormat vd_form,
   1965                                      VectorFormat vn_form,
   1966                                      VectorFormat vm_form) {
   1967   VIXL_ASSERT(vd_form != kFormatUndefined);
   1968   VIXL_ASSERT(vn_form != kFormatUndefined);
   1969   VIXL_ASSERT(vm_form != kFormatUndefined);
   1970 
   1971   SETUP();
   1972   START();
   1973 
   1974   // Roll up the loop to keep the code size down.
   1975   Label loop_n, loop_m;
   1976 
   1977   Register out = x0;
   1978   Register inputs_n_base = x1;
   1979   Register inputs_m_base = x2;
   1980   Register inputs_d_base = x3;
   1981   Register inputs_n_last_16bytes = x4;
   1982   Register inputs_m_last_16bytes = x5;
   1983   Register index_n = x6;
   1984   Register index_m = x7;
   1985 
   1986   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   1987   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   1988   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   1989 
   1990   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   1991   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   1992   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   1993   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   1994   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   1995 
   1996   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
   1997   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
   1998   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
   1999   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
   2000   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
   2001 
   2002 
   2003   // Always load and store 128 bits regardless of the format.
   2004   VRegister vd = v0.V16B();
   2005   VRegister vn = v1.V16B();
   2006   VRegister vm = v2.V16B();
   2007   VRegister vntmp = v3.V16B();
   2008   VRegister vmtmp = v4.V16B();
   2009   VRegister vres = v5.V16B();
   2010 
   2011   // These will have the correct format for calling the 'helper'.
   2012   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2013   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
   2014   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   2015 
   2016   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2017   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   2018   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
   2019 
   2020   __ Mov(out, results);
   2021 
   2022   __ Mov(inputs_d_base, inputs_d);
   2023 
   2024   __ Mov(inputs_n_base, inputs_n);
   2025   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
   2026   __ Mov(inputs_m_base, inputs_m);
   2027   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
   2028 
   2029   __ Ldr(vd, MemOperand(inputs_d_base));
   2030   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   2031   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
   2032 
   2033   __ Mov(index_n, 0);
   2034   __ Bind(&loop_n);
   2035 
   2036   __ Ldr(vntmp_single,
   2037          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   2038   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   2039 
   2040   __ Mov(index_m, 0);
   2041   __ Bind(&loop_m);
   2042 
   2043   __ Ldr(vmtmp_single,
   2044          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
   2045   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
   2046 
   2047   __ Mov(vres, vd);
   2048   {
   2049     for (unsigned i = 0; i < indices_length; i++) {
   2050       {
   2051         SingleEmissionCheckScope guard(&masm);
   2052         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
   2053       }
   2054       __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   2055     }
   2056   }
   2057 
   2058   __ Add(index_m, index_m, 1);
   2059   __ Cmp(index_m, inputs_m_length);
   2060   __ B(lo, &loop_m);
   2061 
   2062   __ Add(index_n, index_n, 1);
   2063   __ Cmp(index_n, inputs_n_length);
   2064   __ B(lo, &loop_n);
   2065 
   2066   END();
   2067   RUN();
   2068   TEARDOWN();
   2069 }
   2070 
   2071 
   2072 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2073 // arrays of rawbit representation of input values. This ensures that
   2074 // exact bit comparisons can be performed.
   2075 template <typename Td, typename Tn, typename Tm>
   2076 static void TestByElementNEON(const char* name,
   2077                               TestByElementNEONHelper_t helper,
   2078                               const Td inputs_d[],
   2079                               const Tn inputs_n[],
   2080                               unsigned inputs_n_length,
   2081                               const Tm inputs_m[],
   2082                               unsigned inputs_m_length,
   2083                               const int indices[],
   2084                               unsigned indices_length,
   2085                               const Td expected[],
   2086                               unsigned expected_length,
   2087                               VectorFormat vd_form,
   2088                               VectorFormat vn_form,
   2089                               VectorFormat vm_form) {
   2090   VIXL_ASSERT(inputs_n_length > 0);
   2091   VIXL_ASSERT(inputs_m_length > 0);
   2092   VIXL_ASSERT(indices_length > 0);
   2093 
   2094   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
   2095 
   2096   const unsigned results_length =
   2097       inputs_n_length * inputs_m_length * indices_length;
   2098   Td* results = new Td[results_length * vd_lane_count];
   2099   const unsigned lane_bit = sizeof(Td) * 8;
   2100   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
   2101 
   2102   TestByElementNEON_Helper(helper,
   2103                            reinterpret_cast<uintptr_t>(inputs_d),
   2104                            reinterpret_cast<uintptr_t>(inputs_n),
   2105                            inputs_n_length,
   2106                            reinterpret_cast<uintptr_t>(inputs_m),
   2107                            inputs_m_length,
   2108                            indices,
   2109                            indices_length,
   2110                            reinterpret_cast<uintptr_t>(results),
   2111                            vd_form,
   2112                            vn_form,
   2113                            vm_form);
   2114 
   2115   if (Test::generate_test_trace()) {
   2116     // Print the results.
   2117     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2118     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2119       printf(" ");
   2120       // Output a separate result for each element of the result vector.
   2121       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2122         unsigned index = lane + (iteration * vd_lane_count);
   2123         printf(" 0x%0*" PRIx64 ",",
   2124                lane_len_in_hex,
   2125                static_cast<uint64_t>(results[index]));
   2126       }
   2127       printf("\n");
   2128     }
   2129 
   2130     printf("};\n");
   2131     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2132            name,
   2133            results_length);
   2134   } else {
   2135     // Check the results.
   2136     VIXL_CHECK(expected_length == results_length);
   2137     unsigned error_count = 0;
   2138     unsigned d = 0;
   2139     const char* padding = "                    ";
   2140     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2141     for (unsigned n = 0; n < inputs_n_length; n++) {
   2142       for (unsigned m = 0; m < inputs_m_length; m++) {
   2143         for (unsigned index = 0; index < indices_length; index++, d++) {
   2144           bool error_in_vector = false;
   2145 
   2146           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2147             unsigned output_index =
   2148                 (n * inputs_m_length * indices_length * vd_lane_count) +
   2149                 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
   2150                 lane;
   2151 
   2152             if (results[output_index] != expected[output_index]) {
   2153               error_in_vector = true;
   2154               break;
   2155             }
   2156           }
   2157 
   2158           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2159             printf("%s\n", name);
   2160             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
   2161                    lane_len_in_hex + 1,
   2162                    padding,
   2163                    lane_len_in_hex + 1,
   2164                    padding,
   2165                    lane_len_in_hex + 1,
   2166                    padding,
   2167                    lane_len_in_hex + 1,
   2168                    padding);
   2169 
   2170             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2171               unsigned output_index =
   2172                   (n * inputs_m_length * indices_length * vd_lane_count) +
   2173                   (m * indices_length * vd_lane_count) +
   2174                   (index * vd_lane_count) + lane;
   2175               unsigned input_index_n =
   2176                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
   2177                   inputs_n_length;
   2178               unsigned input_index_m =
   2179                   (inputs_m_length - vd_lane_count + m + 1 + lane) %
   2180                   inputs_m_length;
   2181 
   2182               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
   2183                      " "
   2184                      "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2185                      results[output_index] != expected[output_index] ? '*'
   2186                                                                      : ' ',
   2187                      lane_len_in_hex,
   2188                      static_cast<uint64_t>(inputs_d[lane]),
   2189                      lane_len_in_hex,
   2190                      static_cast<uint64_t>(inputs_n[input_index_n]),
   2191                      lane_len_in_hex,
   2192                      static_cast<uint64_t>(inputs_m[input_index_m]),
   2193                      indices[index],
   2194                      lane_len_in_hex,
   2195                      static_cast<uint64_t>(results[output_index]),
   2196                      lane_len_in_hex,
   2197                      static_cast<uint64_t>(expected[output_index]));
   2198             }
   2199           }
   2200         }
   2201       }
   2202     }
   2203     VIXL_ASSERT(d == expected_length);
   2204     if (error_count > kErrorReportLimit) {
   2205       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2206     }
   2207     VIXL_CHECK(error_count == 0);
   2208   }
   2209   delete[] results;
   2210 }
   2211 
   2212 
   2213 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
   2214 
   2215 
   2216 template <typename Tm>
   2217 void Test2OpImmNEON_Helper(
   2218     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
   2219     uintptr_t inputs_n,
   2220     unsigned inputs_n_length,
   2221     const Tm inputs_m[],
   2222     unsigned inputs_m_length,
   2223     uintptr_t results,
   2224     VectorFormat vd_form,
   2225     VectorFormat vn_form) {
   2226   VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
   2227 
   2228   SETUP();
   2229   START();
   2230 
   2231   // Roll up the loop to keep the code size down.
   2232   Label loop_n;
   2233 
   2234   Register out = x0;
   2235   Register inputs_n_base = x1;
   2236   Register inputs_n_last_16bytes = x3;
   2237   Register index_n = x5;
   2238 
   2239   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   2240   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   2241   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2242 
   2243   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   2244   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2245   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2246   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   2247   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   2248 
   2249 
   2250   // These will be either a D- or a Q-register form, with a single lane
   2251   // (for use in scalar load and store operations).
   2252   VRegister vd = VRegister(0, vd_bits);
   2253   VRegister vn = v1.V16B();
   2254   VRegister vntmp = v3.V16B();
   2255 
   2256   // These will have the correct format for use when calling 'helper'.
   2257   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
   2258   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2259 
   2260   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2261   VRegister vntmp_single = VRegister(3, vn_lane_bits);
   2262 
   2263   __ Mov(out, results);
   2264 
   2265   __ Mov(inputs_n_base, inputs_n);
   2266   __ Mov(inputs_n_last_16bytes,
   2267          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
   2268 
   2269   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
   2270 
   2271   __ Mov(index_n, 0);
   2272   __ Bind(&loop_n);
   2273 
   2274   __ Ldr(vntmp_single,
   2275          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   2276   __ Ext(vn, vn, vntmp, vn_lane_bytes);
   2277 
   2278   // Set the destination to zero for tests such as '[r]shrn2'.
   2279   // TODO: Setting the destination to values other than zero might be a better
   2280   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
   2281   __ Movi(vd.V16B(), 0);
   2282 
   2283   {
   2284     for (unsigned i = 0; i < inputs_m_length; i++) {
   2285       {
   2286         SingleEmissionCheckScope guard(&masm);
   2287         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
   2288       }
   2289       __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   2290     }
   2291   }
   2292 
   2293   __ Add(index_n, index_n, 1);
   2294   __ Cmp(index_n, inputs_n_length);
   2295   __ B(lo, &loop_n);
   2296 
   2297   END();
   2298   RUN();
   2299   TEARDOWN();
   2300 }
   2301 
   2302 
   2303 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2304 // arrays of rawbit representation of input values. This ensures that
   2305 // exact bit comparisons can be performed.
   2306 template <typename Td, typename Tn, typename Tm>
   2307 static void Test2OpImmNEON(
   2308     const char* name,
   2309     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
   2310     const Tn inputs_n[],
   2311     unsigned inputs_n_length,
   2312     const Tm inputs_m[],
   2313     unsigned inputs_m_length,
   2314     const Td expected[],
   2315     unsigned expected_length,
   2316     VectorFormat vd_form,
   2317     VectorFormat vn_form) {
   2318   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
   2319 
   2320   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2321   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2322   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2323 
   2324   const unsigned results_length = inputs_n_length * inputs_m_length;
   2325   Td* results = new Td[results_length * vd_lane_count];
   2326   const unsigned lane_bit = sizeof(Td) * 8;
   2327   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   2328 
   2329   Test2OpImmNEON_Helper(helper,
   2330                         reinterpret_cast<uintptr_t>(inputs_n),
   2331                         inputs_n_length,
   2332                         inputs_m,
   2333                         inputs_m_length,
   2334                         reinterpret_cast<uintptr_t>(results),
   2335                         vd_form,
   2336                         vn_form);
   2337 
   2338   if (Test::generate_test_trace()) {
   2339     // Print the results.
   2340     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2341     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2342       printf(" ");
   2343       // Output a separate result for each element of the result vector.
   2344       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2345         unsigned index = lane + (iteration * vd_lane_count);
   2346         printf(" 0x%0*" PRIx64 ",",
   2347                lane_len_in_hex,
   2348                static_cast<uint64_t>(results[index]));
   2349       }
   2350       printf("\n");
   2351     }
   2352 
   2353     printf("};\n");
   2354     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2355            name,
   2356            results_length);
   2357   } else {
   2358     // Check the results.
   2359     VIXL_CHECK(expected_length == results_length);
   2360     unsigned error_count = 0;
   2361     unsigned d = 0;
   2362     const char* padding = "                    ";
   2363     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2364     for (unsigned n = 0; n < inputs_n_length; n++) {
   2365       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
   2366         bool error_in_vector = false;
   2367 
   2368         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2369           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2370                                   (m * vd_lane_count) + lane;
   2371 
   2372           if (results[output_index] != expected[output_index]) {
   2373             error_in_vector = true;
   2374             break;
   2375           }
   2376         }
   2377 
   2378         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2379           printf("%s\n", name);
   2380           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
   2381                  lane_len_in_hex + 1,
   2382                  padding,
   2383                  lane_len_in_hex,
   2384                  padding,
   2385                  lane_len_in_hex + 1,
   2386                  padding);
   2387 
   2388           const unsigned first_index_n =
   2389               inputs_n_length - (16 / vn_lane_bytes) + n + 1;
   2390 
   2391           for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
   2392                lane++) {
   2393             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
   2394                                     (m * vd_lane_count) + lane;
   2395             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
   2396             unsigned input_index_m = m;
   2397 
   2398             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
   2399                    " "
   2400                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2401                    results[output_index] != expected[output_index] ? '*' : ' ',
   2402                    lane_len_in_hex,
   2403                    static_cast<uint64_t>(inputs_n[input_index_n]),
   2404                    lane_len_in_hex,
   2405                    static_cast<uint64_t>(inputs_m[input_index_m]),
   2406                    lane_len_in_hex,
   2407                    static_cast<uint64_t>(results[output_index]),
   2408                    lane_len_in_hex,
   2409                    static_cast<uint64_t>(expected[output_index]));
   2410           }
   2411         }
   2412       }
   2413     }
   2414     VIXL_ASSERT(d == expected_length);
   2415     if (error_count > kErrorReportLimit) {
   2416       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2417     }
   2418     VIXL_CHECK(error_count == 0);
   2419   }
   2420   delete[] results;
   2421 }
   2422 
   2423 
   2424 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
   2425 
   2426 
   2427 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
   2428                                       uintptr_t inputs_d,
   2429                                       const int inputs_imm1[],
   2430                                       unsigned inputs_imm1_length,
   2431                                       uintptr_t inputs_n,
   2432                                       unsigned inputs_n_length,
   2433                                       const int inputs_imm2[],
   2434                                       unsigned inputs_imm2_length,
   2435                                       uintptr_t results,
   2436                                       VectorFormat vd_form,
   2437                                       VectorFormat vn_form) {
   2438   VIXL_ASSERT(vd_form != kFormatUndefined);
   2439   VIXL_ASSERT(vn_form != kFormatUndefined);
   2440 
   2441   SETUP();
   2442   START();
   2443 
   2444   // Roll up the loop to keep the code size down.
   2445   Label loop_n;
   2446 
   2447   Register out = x0;
   2448   Register inputs_d_base = x1;
   2449   Register inputs_n_base = x2;
   2450   Register inputs_n_last_vector = x4;
   2451   Register index_n = x6;
   2452 
   2453   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
   2454   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
   2455   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2456 
   2457   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
   2458   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
   2459   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
   2460   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
   2461   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
   2462 
   2463 
   2464   // These will be either a D- or a Q-register form, with a single lane
   2465   // (for use in scalar load and store operations).
   2466   VRegister vd = VRegister(0, vd_bits);
   2467   VRegister vn = VRegister(1, vn_bits);
   2468   VRegister vntmp = VRegister(4, vn_bits);
   2469   VRegister vres = VRegister(5, vn_bits);
   2470 
   2471   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
   2472   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
   2473 
   2474   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
   2475   VRegister vntmp_single = VRegister(4, vn_lane_bits);
   2476 
   2477   // Same registers for use in the 'ext' instructions.
   2478   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
   2479   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
   2480 
   2481   __ Mov(out, results);
   2482 
   2483   __ Mov(inputs_d_base, inputs_d);
   2484 
   2485   __ Mov(inputs_n_base, inputs_n);
   2486   __ Mov(inputs_n_last_vector,
   2487          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
   2488 
   2489   __ Ldr(vd, MemOperand(inputs_d_base));
   2490 
   2491   __ Ldr(vn, MemOperand(inputs_n_last_vector));
   2492 
   2493   __ Mov(index_n, 0);
   2494   __ Bind(&loop_n);
   2495 
   2496   __ Ldr(vntmp_single,
   2497          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   2498   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
   2499 
   2500   {
   2501     EmissionCheckScope guard(&masm,
   2502                              kInstructionSize * inputs_imm1_length *
   2503                                  inputs_imm2_length * 3);
   2504     for (unsigned i = 0; i < inputs_imm1_length; i++) {
   2505       for (unsigned j = 0; j < inputs_imm2_length; j++) {
   2506         __ Mov(vres, vd);
   2507         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
   2508         __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
   2509       }
   2510     }
   2511   }
   2512 
   2513   __ Add(index_n, index_n, 1);
   2514   __ Cmp(index_n, inputs_n_length);
   2515   __ B(lo, &loop_n);
   2516 
   2517   END();
   2518   RUN();
   2519   TEARDOWN();
   2520 }
   2521 
   2522 
   2523 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
   2524 // arrays of rawbit representation of input values. This ensures that
   2525 // exact bit comparisons can be performed.
   2526 template <typename Td, typename Tn>
   2527 static void TestOpImmOpImmNEON(const char* name,
   2528                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
   2529                                const Td inputs_d[],
   2530                                const int inputs_imm1[],
   2531                                unsigned inputs_imm1_length,
   2532                                const Tn inputs_n[],
   2533                                unsigned inputs_n_length,
   2534                                const int inputs_imm2[],
   2535                                unsigned inputs_imm2_length,
   2536                                const Td expected[],
   2537                                unsigned expected_length,
   2538                                VectorFormat vd_form,
   2539                                VectorFormat vn_form) {
   2540   VIXL_ASSERT(inputs_n_length > 0);
   2541   VIXL_ASSERT(inputs_imm1_length > 0);
   2542   VIXL_ASSERT(inputs_imm2_length > 0);
   2543 
   2544   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
   2545 
   2546   const unsigned results_length =
   2547       inputs_n_length * inputs_imm1_length * inputs_imm2_length;
   2548 
   2549   Td* results = new Td[results_length * vd_lane_count];
   2550   const unsigned lane_bit = sizeof(Td) * 8;
   2551   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
   2552 
   2553   TestOpImmOpImmNEON_Helper(helper,
   2554                             reinterpret_cast<uintptr_t>(inputs_d),
   2555                             inputs_imm1,
   2556                             inputs_imm1_length,
   2557                             reinterpret_cast<uintptr_t>(inputs_n),
   2558                             inputs_n_length,
   2559                             inputs_imm2,
   2560                             inputs_imm2_length,
   2561                             reinterpret_cast<uintptr_t>(results),
   2562                             vd_form,
   2563                             vn_form);
   2564 
   2565   if (Test::generate_test_trace()) {
   2566     // Print the results.
   2567     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
   2568     for (unsigned iteration = 0; iteration < results_length; iteration++) {
   2569       printf(" ");
   2570       // Output a separate result for each element of the result vector.
   2571       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2572         unsigned index = lane + (iteration * vd_lane_count);
   2573         printf(" 0x%0*" PRIx64 ",",
   2574                lane_len_in_hex,
   2575                static_cast<uint64_t>(results[index]));
   2576       }
   2577       printf("\n");
   2578     }
   2579 
   2580     printf("};\n");
   2581     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
   2582            name,
   2583            results_length);
   2584   } else {
   2585     // Check the results.
   2586     VIXL_CHECK(expected_length == results_length);
   2587     unsigned error_count = 0;
   2588     unsigned counted_length = 0;
   2589     const char* padding = "                    ";
   2590     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
   2591     for (unsigned n = 0; n < inputs_n_length; n++) {
   2592       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
   2593         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
   2594           bool error_in_vector = false;
   2595 
   2596           counted_length++;
   2597 
   2598           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2599             unsigned output_index =
   2600                 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
   2601                 (imm1 * inputs_imm2_length * vd_lane_count) +
   2602                 (imm2 * vd_lane_count) + lane;
   2603 
   2604             if (results[output_index] != expected[output_index]) {
   2605               error_in_vector = true;
   2606               break;
   2607             }
   2608           }
   2609 
   2610           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
   2611             printf("%s\n", name);
   2612             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
   2613                    lane_len_in_hex + 1,
   2614                    padding,
   2615                    lane_len_in_hex,
   2616                    padding,
   2617                    lane_len_in_hex + 1,
   2618                    padding,
   2619                    lane_len_in_hex,
   2620                    padding,
   2621                    lane_len_in_hex + 1,
   2622                    padding);
   2623 
   2624             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
   2625               unsigned output_index =
   2626                   (n * inputs_imm1_length * inputs_imm2_length *
   2627                    vd_lane_count) +
   2628                   (imm1 * inputs_imm2_length * vd_lane_count) +
   2629                   (imm2 * vd_lane_count) + lane;
   2630               unsigned input_index_n =
   2631                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
   2632                   inputs_n_length;
   2633               unsigned input_index_imm1 = imm1;
   2634               unsigned input_index_imm2 = imm2;
   2635 
   2636               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
   2637                      " "
   2638                      "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
   2639                      results[output_index] != expected[output_index] ? '*'
   2640                                                                      : ' ',
   2641                      lane_len_in_hex,
   2642                      static_cast<uint64_t>(inputs_d[lane]),
   2643                      lane_len_in_hex,
   2644                      static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
   2645                      lane_len_in_hex,
   2646                      static_cast<uint64_t>(inputs_n[input_index_n]),
   2647                      lane_len_in_hex,
   2648                      static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
   2649                      lane_len_in_hex,
   2650                      static_cast<uint64_t>(results[output_index]),
   2651                      lane_len_in_hex,
   2652                      static_cast<uint64_t>(expected[output_index]));
   2653             }
   2654           }
   2655         }
   2656       }
   2657     }
   2658     VIXL_ASSERT(counted_length == expected_length);
   2659     if (error_count > kErrorReportLimit) {
   2660       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
   2661     }
   2662     VIXL_CHECK(error_count == 0);
   2663   }
   2664   delete[] results;
   2665 }
   2666 
   2667 
   2668 // ==== Floating-point tests. ====
   2669 
   2670 
   2671 // Standard floating-point test expansion for both double- and single-precision
   2672 // operations.
   2673 #define STRINGIFY(s) #s
   2674 
   2675 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
   2676   Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
   2677              &MacroAssembler::mnemonic,                     \
   2678              input,                                         \
   2679              sizeof(input) / sizeof(input[0]),              \
   2680              kExpected_##mnemonic##_##variant,              \
   2681              kExpectedCount_##mnemonic##_##variant)
   2682 
   2683 #define DEFINE_TEST_FP(mnemonic, type, input)                    \
   2684   TEST(mnemonic##_d) {                                           \
   2685     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
   2686   }                                                              \
   2687   TEST(mnemonic##_s) {                                           \
   2688     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
   2689   }
   2690 
   2691 // TODO: Test with a newer version of valgrind.
   2692 //
   2693 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
   2694 // Therefore this test will be exiting though an ASSERT and thus leaking
   2695 // memory.
   2696 DEFINE_TEST_FP(fmadd, 3Op, Basic)
   2697 DEFINE_TEST_FP(fmsub, 3Op, Basic)
   2698 DEFINE_TEST_FP(fnmadd, 3Op, Basic)
   2699 DEFINE_TEST_FP(fnmsub, 3Op, Basic)
   2700 
   2701 DEFINE_TEST_FP(fadd, 2Op, Basic)
   2702 DEFINE_TEST_FP(fdiv, 2Op, Basic)
   2703 DEFINE_TEST_FP(fmax, 2Op, Basic)
   2704 DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
   2705 DEFINE_TEST_FP(fmin, 2Op, Basic)
   2706 DEFINE_TEST_FP(fminnm, 2Op, Basic)
   2707 DEFINE_TEST_FP(fmul, 2Op, Basic)
   2708 DEFINE_TEST_FP(fsub, 2Op, Basic)
   2709 DEFINE_TEST_FP(fnmul, 2Op, Basic)
   2710 
   2711 DEFINE_TEST_FP(fabs, 1Op, Basic)
   2712 DEFINE_TEST_FP(fmov, 1Op, Basic)
   2713 DEFINE_TEST_FP(fneg, 1Op, Basic)
   2714 DEFINE_TEST_FP(fsqrt, 1Op, Basic)
   2715 DEFINE_TEST_FP(frinta, 1Op, Conversions)
   2716 DEFINE_TEST_FP(frinti, 1Op, Conversions)
   2717 DEFINE_TEST_FP(frintm, 1Op, Conversions)
   2718 DEFINE_TEST_FP(frintn, 1Op, Conversions)
   2719 DEFINE_TEST_FP(frintp, 1Op, Conversions)
   2720 DEFINE_TEST_FP(frintx, 1Op, Conversions)
   2721 DEFINE_TEST_FP(frintz, 1Op, Conversions)
   2722 
   2723 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
   2724 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
   2725 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
   2726 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
   2727 
   2728 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
   2729 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
   2730 
   2731 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)              \
   2732   TEST(mnemonic##_xd) {                                           \
   2733     CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
   2734   }                                                               \
   2735   TEST(mnemonic##_xs) {                                           \
   2736     CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
   2737   }                                                               \
   2738   TEST(mnemonic##_wd) {                                           \
   2739     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
   2740   }                                                               \
   2741   TEST(mnemonic##_ws) {                                           \
   2742     CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
   2743   }
   2744 
   2745 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
   2746 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
   2747 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
   2748 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
   2749 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
   2750 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
   2751 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
   2752 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
   2753 
   2754 // TODO: Scvtf-fixed-point
   2755 // TODO: Scvtf-integer
   2756 // TODO: Ucvtf-fixed-point
   2757 // TODO: Ucvtf-integer
   2758 
   2759 // TODO: Fccmp
   2760 // TODO: Fcsel
   2761 
   2762 
   2763 // ==== NEON Tests. ====
   2764 
   2765 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
   2766   Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
   2767               &MacroAssembler::mnemonic,                             \
   2768               input_n,                                               \
   2769               (sizeof(input_n) / sizeof(input_n[0])),                \
   2770               kExpected_NEON_##mnemonic##_##vdform,                  \
   2771               kExpectedCount_NEON_##mnemonic##_##vdform,             \
   2772               kFormat##vdform,                                       \
   2773               kFormat##vnform)
   2774 
   2775 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
   2776   Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
   2777                         vnform),                                             \
   2778                     &MacroAssembler::mnemonic,                               \
   2779                     input_n,                                                 \
   2780                     (sizeof(input_n) / sizeof(input_n[0])),                  \
   2781                     kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
   2782                     kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
   2783                     kFormat##vdform,                                         \
   2784                     kFormat##vnform)
   2785 
   2786 #define CALL_TEST_NEON_HELPER_2Op(                               \
   2787     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
   2788   Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),         \
   2789               &MacroAssembler::mnemonic,                         \
   2790               input_d,                                           \
   2791               input_n,                                           \
   2792               (sizeof(input_n) / sizeof(input_n[0])),            \
   2793               input_m,                                           \
   2794               (sizeof(input_m) / sizeof(input_m[0])),            \
   2795               kExpected_NEON_##mnemonic##_##vdform,              \
   2796               kExpectedCount_NEON_##mnemonic##_##vdform,         \
   2797               kFormat##vdform,                                   \
   2798               kFormat##vnform,                                   \
   2799               kFormat##vmform)
   2800 
   2801 #define CALL_TEST_NEON_HELPER_2OpImm(                                 \
   2802     mnemonic, vdform, vnform, input_n, input_m)                       \
   2803   Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
   2804                  &MacroAssembler::mnemonic,                           \
   2805                  input_n,                                             \
   2806                  (sizeof(input_n) / sizeof(input_n[0])),              \
   2807                  input_m,                                             \
   2808                  (sizeof(input_m) / sizeof(input_m[0])),              \
   2809                  kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
   2810                  kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
   2811                  kFormat##vdform,                                     \
   2812                  kFormat##vnform)
   2813 
   2814 #define CALL_TEST_NEON_HELPER_ByElement(                                  \
   2815     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
   2816   TestByElementNEON(                                                      \
   2817       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(            \
   2818           vnform) "_" STRINGIFY(vmform),                                  \
   2819       &MacroAssembler::mnemonic,                                          \
   2820       input_d,                                                            \
   2821       input_n,                                                            \
   2822       (sizeof(input_n) / sizeof(input_n[0])),                             \
   2823       input_m,                                                            \
   2824       (sizeof(input_m) / sizeof(input_m[0])),                             \
   2825       indices,                                                            \
   2826       (sizeof(indices) / sizeof(indices[0])),                             \
   2827       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,         \
   2828       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,    \
   2829       kFormat##vdform,                                                    \
   2830       kFormat##vnform,                                                    \
   2831       kFormat##vmform)
   2832 
   2833 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
   2834                                          mnemonic,                 \
   2835                                          vdform,                   \
   2836                                          vnform,                   \
   2837                                          input_d,                  \
   2838                                          input_imm1,               \
   2839                                          input_n,                  \
   2840                                          input_imm2)               \
   2841   TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
   2842                      helper,                                       \
   2843                      input_d,                                      \
   2844                      input_imm1,                                   \
   2845                      (sizeof(input_imm1) / sizeof(input_imm1[0])), \
   2846                      input_n,                                      \
   2847                      (sizeof(input_n) / sizeof(input_n[0])),       \
   2848                      input_imm2,                                   \
   2849                      (sizeof(input_imm2) / sizeof(input_imm2[0])), \
   2850                      kExpected_NEON_##mnemonic##_##vdform,         \
   2851                      kExpectedCount_NEON_##mnemonic##_##vdform,    \
   2852                      kFormat##vdform,                              \
   2853                      kFormat##vnform)
   2854 
   2855 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
   2856   CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
   2857 
   2858 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
   2859   TEST(mnemonic##_8B) {                                             \
   2860     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
   2861   }                                                                 \
   2862   TEST(mnemonic##_16B) {                                            \
   2863     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
   2864   }
   2865 
   2866 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
   2867   TEST(mnemonic##_4H) {                                             \
   2868     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
   2869   }                                                                 \
   2870   TEST(mnemonic##_8H) {                                             \
   2871     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
   2872   }
   2873 
   2874 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
   2875   TEST(mnemonic##_2S) {                                             \
   2876     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
   2877   }                                                                 \
   2878   TEST(mnemonic##_4S) {                                             \
   2879     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
   2880   }
   2881 
   2882 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
   2883   DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
   2884   DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
   2885 
   2886 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
   2887   DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
   2888   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
   2889 
   2890 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
   2891   DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
   2892   TEST(mnemonic##_2D) {                                             \
   2893     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
   2894   }
   2895 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
   2896   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
   2897   TEST(mnemonic##_2D) {                                             \
   2898     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
   2899   }
   2900 
   2901 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
   2902   TEST(mnemonic##_2S) {                                             \
   2903     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
   2904   }                                                                 \
   2905   TEST(mnemonic##_4S) {                                             \
   2906     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
   2907   }                                                                 \
   2908   TEST(mnemonic##_2D) {                                             \
   2909     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
   2910   }
   2911 
   2912 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)          \
   2913   TEST(mnemonic##_S) {                                             \
   2914     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);  \
   2915   }                                                                \
   2916   TEST(mnemonic##_D) {                                             \
   2917     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
   2918   }
   2919 
   2920 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
   2921   TEST(mnemonic##_B) {                                            \
   2922     CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
   2923   }
   2924 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
   2925   TEST(mnemonic##_H) {                                             \
   2926     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
   2927   }
   2928 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
   2929   TEST(mnemonic##_S) {                                             \
   2930     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
   2931   }
   2932 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
   2933   TEST(mnemonic##_D) {                                             \
   2934     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
   2935   }
   2936 
   2937 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
   2938   DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
   2939   DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
   2940   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
   2941   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
   2942 
   2943 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
   2944   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
   2945   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
   2946 
   2947 
   2948 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
   2949   CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
   2950 
   2951 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
   2952   TEST(mnemonic##_B_8B) {                                               \
   2953     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
   2954   }                                                                     \
   2955   TEST(mnemonic##_B_16B) {                                              \
   2956     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
   2957   }                                                                     \
   2958   TEST(mnemonic##_H_4H) {                                               \
   2959     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
   2960   }                                                                     \
   2961   TEST(mnemonic##_H_8H) {                                               \
   2962     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
   2963   }                                                                     \
   2964   TEST(mnemonic##_S_4S) {                                               \
   2965     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
   2966   }
   2967 
   2968 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
   2969   TEST(mnemonic##_H_8B) {                                               \
   2970     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
   2971   }                                                                     \
   2972   TEST(mnemonic##_H_16B) {                                              \
   2973     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
   2974   }                                                                     \
   2975   TEST(mnemonic##_S_4H) {                                               \
   2976     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
   2977   }                                                                     \
   2978   TEST(mnemonic##_S_8H) {                                               \
   2979     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
   2980   }                                                                     \
   2981   TEST(mnemonic##_D_4S) {                                               \
   2982     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
   2983   }
   2984 
   2985 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                    \
   2986   TEST(mnemonic##_S_4S) {                                              \
   2987     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
   2988   }
   2989 
   2990 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
   2991   CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
   2992 
   2993 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
   2994   TEST(mnemonic##_4H) {                                                 \
   2995     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
   2996   }                                                                     \
   2997   TEST(mnemonic##_8H) {                                                 \
   2998     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
   2999   }                                                                     \
   3000   TEST(mnemonic##_2S) {                                                 \
   3001     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
   3002   }                                                                     \
   3003   TEST(mnemonic##_4S) {                                                 \
   3004     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
   3005   }                                                                     \
   3006   TEST(mnemonic##_1D) {                                                 \
   3007     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
   3008   }                                                                     \
   3009   TEST(mnemonic##_2D) {                                                 \
   3010     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
   3011   }
   3012 
   3013 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
   3014   TEST(mnemonic##_8B) {                                                     \
   3015     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
   3016   }                                                                         \
   3017   TEST(mnemonic##_4H) {                                                     \
   3018     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
   3019   }                                                                         \
   3020   TEST(mnemonic##_2S) {                                                     \
   3021     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
   3022   }                                                                         \
   3023   TEST(mnemonic##2_16B) {                                                   \
   3024     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
   3025   }                                                                         \
   3026   TEST(mnemonic##2_8H) {                                                    \
   3027     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
   3028   }                                                                         \
   3029   TEST(mnemonic##2_4S) {                                                    \
   3030     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
   3031   }
   3032 
   3033 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
   3034   TEST(mnemonic##_4S) {                                                     \
   3035     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
   3036   }                                                                         \
   3037   TEST(mnemonic##_2D) {                                                     \
   3038     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
   3039   }                                                                         \
   3040   TEST(mnemonic##2_4S) {                                                    \
   3041     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
   3042   }                                                                         \
   3043   TEST(mnemonic##2_2D) {                                                    \
   3044     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
   3045   }
   3046 
   3047 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
   3048   TEST(mnemonic##_4H) {                                                    \
   3049     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
   3050   }                                                                        \
   3051   TEST(mnemonic##_2S) {                                                    \
   3052     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
   3053   }                                                                        \
   3054   TEST(mnemonic##2_8H) {                                                   \
   3055     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
   3056   }                                                                        \
   3057   TEST(mnemonic##2_4S) {                                                   \
   3058     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
   3059   }
   3060 
   3061 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
   3062   TEST(mnemonic##_2S) {                                                    \
   3063     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
   3064   }                                                                        \
   3065   TEST(mnemonic##2_4S) {                                                   \
   3066     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
   3067   }
   3068 
   3069 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
   3070   TEST(mnemonic##_B) {                                                \
   3071     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
   3072   }                                                                   \
   3073   TEST(mnemonic##_H) {                                                \
   3074     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
   3075   }                                                                   \
   3076   TEST(mnemonic##_S) {                                                \
   3077     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
   3078   }
   3079 
   3080 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)           \
   3081   TEST(mnemonic##_S) {                                                 \
   3082     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);  \
   3083   }                                                                    \
   3084   TEST(mnemonic##_D) {                                                 \
   3085     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
   3086   }
   3087 
   3088 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
   3089   {                                                                       \
   3090     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
   3091                               variant,                                    \
   3092                               variant,                                    \
   3093                               variant,                                    \
   3094                               input_d,                                    \
   3095                               input_nm,                                   \
   3096                               input_nm);                                  \
   3097   }
   3098 
   3099 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
   3100   TEST(mnemonic##_8B) {                                    \
   3101     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
   3102                                 8B,                        \
   3103                                 kInput8bitsAccDestination, \
   3104                                 kInput8bits##input);       \
   3105   }                                                        \
   3106   TEST(mnemonic##_16B) {                                   \
   3107     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
   3108                                 16B,                       \
   3109                                 kInput8bitsAccDestination, \
   3110                                 kInput8bits##input);       \
   3111   }
   3112 
   3113 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
   3114   TEST(mnemonic##_4H) {                                     \
   3115     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3116                                 4H,                         \
   3117                                 kInput16bitsAccDestination, \
   3118                                 kInput16bits##input);       \
   3119   }                                                         \
   3120   TEST(mnemonic##_8H) {                                     \
   3121     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3122                                 8H,                         \
   3123                                 kInput16bitsAccDestination, \
   3124                                 kInput16bits##input);       \
   3125   }                                                         \
   3126   TEST(mnemonic##_2S) {                                     \
   3127     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3128                                 2S,                         \
   3129                                 kInput32bitsAccDestination, \
   3130                                 kInput32bits##input);       \
   3131   }                                                         \
   3132   TEST(mnemonic##_4S) {                                     \
   3133     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3134                                 4S,                         \
   3135                                 kInput32bitsAccDestination, \
   3136                                 kInput32bits##input);       \
   3137   }
   3138 
   3139 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
   3140   DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
   3141   DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
   3142 
   3143 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
   3144   DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
   3145   TEST(mnemonic##_2D) {                                     \
   3146     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3147                                 2D,                         \
   3148                                 kInput64bitsAccDestination, \
   3149                                 kInput64bits##input);       \
   3150   }
   3151 
   3152 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)          \
   3153   TEST(mnemonic##_2S) {                                     \
   3154     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3155                                 2S,                         \
   3156                                 kInputFloatAccDestination,  \
   3157                                 kInputFloat##input);        \
   3158   }                                                         \
   3159   TEST(mnemonic##_4S) {                                     \
   3160     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3161                                 4S,                         \
   3162                                 kInputFloatAccDestination,  \
   3163                                 kInputFloat##input);        \
   3164   }                                                         \
   3165   TEST(mnemonic##_2D) {                                     \
   3166     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3167                                 2D,                         \
   3168                                 kInputDoubleAccDestination, \
   3169                                 kInputDouble##input);       \
   3170   }
   3171 
   3172 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
   3173   TEST(mnemonic##_D) {                                      \
   3174     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3175                                 D,                          \
   3176                                 kInput64bitsAccDestination, \
   3177                                 kInput64bits##input);       \
   3178   }
   3179 
   3180 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
   3181   TEST(mnemonic##_H) {                                      \
   3182     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3183                                 H,                          \
   3184                                 kInput16bitsAccDestination, \
   3185                                 kInput16bits##input);       \
   3186   }                                                         \
   3187   TEST(mnemonic##_S) {                                      \
   3188     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3189                                 S,                          \
   3190                                 kInput32bitsAccDestination, \
   3191                                 kInput32bits##input);       \
   3192   }
   3193 
   3194 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
   3195   TEST(mnemonic##_B) {                                      \
   3196     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3197                                 B,                          \
   3198                                 kInput8bitsAccDestination,  \
   3199                                 kInput8bits##input);        \
   3200   }                                                         \
   3201   TEST(mnemonic##_H) {                                      \
   3202     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3203                                 H,                          \
   3204                                 kInput16bitsAccDestination, \
   3205                                 kInput16bits##input);       \
   3206   }                                                         \
   3207   TEST(mnemonic##_S) {                                      \
   3208     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3209                                 S,                          \
   3210                                 kInput32bitsAccDestination, \
   3211                                 kInput32bits##input);       \
   3212   }                                                         \
   3213   TEST(mnemonic##_D) {                                      \
   3214     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3215                                 D,                          \
   3216                                 kInput64bitsAccDestination, \
   3217                                 kInput64bits##input);       \
   3218   }
   3219 
   3220 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)   \
   3221   TEST(mnemonic##_S) {                                      \
   3222     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3223                                 S,                          \
   3224                                 kInputFloatAccDestination,  \
   3225                                 kInputFloat##input);        \
   3226   }                                                         \
   3227   TEST(mnemonic##_D) {                                      \
   3228     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
   3229                                 D,                          \
   3230                                 kInputDoubleAccDestination, \
   3231                                 kInputDouble##input);       \
   3232   }
   3233 
   3234 #define CALL_TEST_NEON_HELPER_3DIFF(                             \
   3235     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
   3236   {                                                              \
   3237     CALL_TEST_NEON_HELPER_2Op(mnemonic,                          \
   3238                               vdform,                            \
   3239                               vnform,                            \
   3240                               vmform,                            \
   3241                               input_d,                           \
   3242                               input_n,                           \
   3243                               input_m);                          \
   3244   }
   3245 
   3246 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
   3247   TEST(mnemonic##_8H) {                                     \
   3248     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3249                                 8H,                         \
   3250                                 8B,                         \
   3251                                 8B,                         \
   3252                                 kInput16bitsAccDestination, \
   3253                                 kInput8bits##input,         \
   3254                                 kInput8bits##input);        \
   3255   }                                                         \
   3256   TEST(mnemonic##2_8H) {                                    \
   3257     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3258                                 8H,                         \
   3259                                 16B,                        \
   3260                                 16B,                        \
   3261                                 kInput16bitsAccDestination, \
   3262                                 kInput8bits##input,         \
   3263                                 kInput8bits##input);        \
   3264   }
   3265 
   3266 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
   3267   TEST(mnemonic##_4S) {                                     \
   3268     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3269                                 4S,                         \
   3270                                 4H,                         \
   3271                                 4H,                         \
   3272                                 kInput32bitsAccDestination, \
   3273                                 kInput16bits##input,        \
   3274                                 kInput16bits##input);       \
   3275   }                                                         \
   3276   TEST(mnemonic##2_4S) {                                    \
   3277     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3278                                 4S,                         \
   3279                                 8H,                         \
   3280                                 8H,                         \
   3281                                 kInput32bitsAccDestination, \
   3282                                 kInput16bits##input,        \
   3283                                 kInput16bits##input);       \
   3284   }
   3285 
   3286 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
   3287   TEST(mnemonic##_2D) {                                     \
   3288     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3289                                 2D,                         \
   3290                                 2S,                         \
   3291                                 2S,                         \
   3292                                 kInput64bitsAccDestination, \
   3293                                 kInput32bits##input,        \
   3294                                 kInput32bits##input);       \
   3295   }                                                         \
   3296   TEST(mnemonic##2_2D) {                                    \
   3297     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3298                                 2D,                         \
   3299                                 4S,                         \
   3300                                 4S,                         \
   3301                                 kInput64bitsAccDestination, \
   3302                                 kInput32bits##input,        \
   3303                                 kInput32bits##input);       \
   3304   }
   3305 
   3306 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
   3307   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
   3308   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
   3309 
   3310 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
   3311   DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
   3312   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
   3313   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
   3314 
   3315 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
   3316   TEST(mnemonic##_S) {                                        \
   3317     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
   3318                                 S,                            \
   3319                                 H,                            \
   3320                                 H,                            \
   3321                                 kInput32bitsAccDestination,   \
   3322                                 kInput16bits##input,          \
   3323                                 kInput16bits##input);         \
   3324   }
   3325 
   3326 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
   3327   TEST(mnemonic##_D) {                                        \
   3328     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
   3329                                 D,                            \
   3330                                 S,                            \
   3331                                 S,                            \
   3332                                 kInput64bitsAccDestination,   \
   3333                                 kInput32bits##input,          \
   3334                                 kInput32bits##input);         \
   3335   }
   3336 
   3337 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
   3338   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
   3339   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
   3340 
   3341 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
   3342   TEST(mnemonic##_8H) {                                     \
   3343     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3344                                 8H,                         \
   3345                                 8H,                         \
   3346                                 8B,                         \
   3347                                 kInput16bitsAccDestination, \
   3348                                 kInput16bits##input,        \
   3349                                 kInput8bits##input);        \
   3350   }                                                         \
   3351   TEST(mnemonic##_4S) {                                     \
   3352     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3353                                 4S,                         \
   3354                                 4S,                         \
   3355                                 4H,                         \
   3356                                 kInput32bitsAccDestination, \
   3357                                 kInput32bits##input,        \
   3358                                 kInput16bits##input);       \
   3359   }                                                         \
   3360   TEST(mnemonic##_2D) {                                     \
   3361     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3362                                 2D,                         \
   3363                                 2D,                         \
   3364                                 2S,                         \
   3365                                 kInput64bitsAccDestination, \
   3366                                 kInput64bits##input,        \
   3367                                 kInput32bits##input);       \
   3368   }                                                         \
   3369   TEST(mnemonic##2_8H) {                                    \
   3370     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3371                                 8H,                         \
   3372                                 8H,                         \
   3373                                 16B,                        \
   3374                                 kInput16bitsAccDestination, \
   3375                                 kInput16bits##input,        \
   3376                                 kInput8bits##input);        \
   3377   }                                                         \
   3378   TEST(mnemonic##2_4S) {                                    \
   3379     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3380                                 4S,                         \
   3381                                 4S,                         \
   3382                                 8H,                         \
   3383                                 kInput32bitsAccDestination, \
   3384                                 kInput32bits##input,        \
   3385                                 kInput16bits##input);       \
   3386   }                                                         \
   3387   TEST(mnemonic##2_2D) {                                    \
   3388     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3389                                 2D,                         \
   3390                                 2D,                         \
   3391                                 4S,                         \
   3392                                 kInput64bitsAccDestination, \
   3393                                 kInput64bits##input,        \
   3394                                 kInput32bits##input);       \
   3395   }
   3396 
   3397 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
   3398   TEST(mnemonic##_8B) {                                     \
   3399     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3400                                 8B,                         \
   3401                                 8H,                         \
   3402                                 8H,                         \
   3403                                 kInput8bitsAccDestination,  \
   3404                                 kInput16bits##input,        \
   3405                                 kInput16bits##input);       \
   3406   }                                                         \
   3407   TEST(mnemonic##_4H) {                                     \
   3408     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3409                                 4H,                         \
   3410                                 4S,                         \
   3411                                 4S,                         \
   3412                                 kInput16bitsAccDestination, \
   3413                                 kInput32bits##input,        \
   3414                                 kInput32bits##input);       \
   3415   }                                                         \
   3416   TEST(mnemonic##_2S) {                                     \
   3417     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
   3418                                 2S,                         \
   3419                                 2D,                         \
   3420                                 2D,                         \
   3421                                 kInput32bitsAccDestination, \
   3422                                 kInput64bits##input,        \
   3423                                 kInput64bits##input);       \
   3424   }                                                         \
   3425   TEST(mnemonic##2_16B) {                                   \
   3426     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3427                                 16B,                        \
   3428                                 8H,                         \
   3429                                 8H,                         \
   3430                                 kInput8bitsAccDestination,  \
   3431                                 kInput16bits##input,        \
   3432                                 kInput16bits##input);       \
   3433   }                                                         \
   3434   TEST(mnemonic##2_8H) {                                    \
   3435     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3436                                 8H,                         \
   3437                                 4S,                         \
   3438                                 4S,                         \
   3439                                 kInput16bitsAccDestination, \
   3440                                 kInput32bits##input,        \
   3441                                 kInput32bits##input);       \
   3442   }                                                         \
   3443   TEST(mnemonic##2_4S) {                                    \
   3444     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
   3445                                 4S,                         \
   3446                                 2D,                         \
   3447                                 2D,                         \
   3448                                 kInput32bitsAccDestination, \
   3449                                 kInput64bits##input,        \
   3450                                 kInput64bits##input);       \
   3451   }
   3452 
   3453 #define CALL_TEST_NEON_HELPER_2OPIMM(             \
   3454     mnemonic, vdform, vnform, input_n, input_imm) \
   3455   {                                               \
   3456     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,        \
   3457                                  vdform,          \
   3458                                  vnform,          \
   3459                                  input_n,         \
   3460                                  input_imm);      \
   3461   }
   3462 
   3463 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
   3464   TEST(mnemonic##_8B_2OPIMM) {                                \
   3465     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3466                                  8B,                          \
   3467                                  8B,                          \
   3468                                  kInput8bits##input,          \
   3469                                  kInput8bitsImm##input_imm);  \
   3470   }                                                           \
   3471   TEST(mnemonic##_16B_2OPIMM) {                               \
   3472     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3473                                  16B,                         \
   3474                                  16B,                         \
   3475                                  kInput8bits##input,          \
   3476                                  kInput8bitsImm##input_imm);  \
   3477   }                                                           \
   3478   TEST(mnemonic##_4H_2OPIMM) {                                \
   3479     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3480                                  4H,                          \
   3481                                  4H,                          \
   3482                                  kInput16bits##input,         \
   3483                                  kInput16bitsImm##input_imm); \
   3484   }                                                           \
   3485   TEST(mnemonic##_8H_2OPIMM) {                                \
   3486     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3487                                  8H,                          \
   3488                                  8H,                          \
   3489                                  kInput16bits##input,         \
   3490                                  kInput16bitsImm##input_imm); \
   3491   }                                                           \
   3492   TEST(mnemonic##_2S_2OPIMM) {                                \
   3493     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3494                                  2S,                          \
   3495                                  2S,                          \
   3496                                  kInput32bits##input,         \
   3497                                  kInput32bitsImm##input_imm); \
   3498   }                                                           \
   3499   TEST(mnemonic##_4S_2OPIMM) {                                \
   3500     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3501                                  4S,                          \
   3502                                  4S,                          \
   3503                                  kInput32bits##input,         \
   3504                                  kInput32bitsImm##input_imm); \
   3505   }                                                           \
   3506   TEST(mnemonic##_2D_2OPIMM) {                                \
   3507     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
   3508                                  2D,                          \
   3509                                  2D,                          \
   3510                                  kInput64bits##input,         \
   3511                                  kInput64bitsImm##input_imm); \
   3512   }
   3513 
   3514 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
   3515   TEST(mnemonic##_8B_2OPIMM) {                                   \
   3516     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3517                                  8B,                             \
   3518                                  B,                              \
   3519                                  kInput8bits##input,             \
   3520                                  kInput8bitsImm##input_imm);     \
   3521   }                                                              \
   3522   TEST(mnemonic##_16B_2OPIMM) {                                  \
   3523     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3524                                  16B,                            \
   3525                                  B,                              \
   3526                                  kInput8bits##input,             \
   3527                                  kInput8bitsImm##input_imm);     \
   3528   }                                                              \
   3529   TEST(mnemonic##_4H_2OPIMM) {                                   \
   3530     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3531                                  4H,                             \
   3532                                  H,                              \
   3533                                  kInput16bits##input,            \
   3534                                  kInput16bitsImm##input_imm);    \
   3535   }                                                              \
   3536   TEST(mnemonic##_8H_2OPIMM) {                                   \
   3537     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3538                                  8H,                             \
   3539                                  H,                              \
   3540                                  kInput16bits##input,            \
   3541                                  kInput16bitsImm##input_imm);    \
   3542   }                                                              \
   3543   TEST(mnemonic##_2S_2OPIMM) {                                   \
   3544     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3545                                  2S,                             \
   3546                                  S,                              \
   3547                                  kInput32bits##input,            \
   3548                                  kInput32bitsImm##input_imm);    \
   3549   }                                                              \
   3550   TEST(mnemonic##_4S_2OPIMM) {                                   \
   3551     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3552                                  4S,                             \
   3553                                  S,                              \
   3554                                  kInput32bits##input,            \
   3555                                  kInput32bitsImm##input_imm);    \
   3556   }                                                              \
   3557   TEST(mnemonic##_2D_2OPIMM) {                                   \
   3558     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3559                                  2D,                             \
   3560                                  D,                              \
   3561                                  kInput64bits##input,            \
   3562                                  kInput64bitsImm##input_imm);    \
   3563   }
   3564 
   3565 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
   3566   TEST(mnemonic##_8B_2OPIMM) {                                     \
   3567     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3568                                  8B,                               \
   3569                                  8H,                               \
   3570                                  kInput16bits##input,              \
   3571                                  kInput8bitsImm##input_imm);       \
   3572   }                                                                \
   3573   TEST(mnemonic##_4H_2OPIMM) {                                     \
   3574     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3575                                  4H,                               \
   3576                                  4S,                               \
   3577                                  kInput32bits##input,              \
   3578                                  kInput16bitsImm##input_imm);      \
   3579   }                                                                \
   3580   TEST(mnemonic##_2S_2OPIMM) {                                     \
   3581     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3582                                  2S,                               \
   3583                                  2D,                               \
   3584                                  kInput64bits##input,              \
   3585                                  kInput32bitsImm##input_imm);      \
   3586   }                                                                \
   3587   TEST(mnemonic##2_16B_2OPIMM) {                                   \
   3588     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
   3589                                  16B,                              \
   3590                                  8H,                               \
   3591                                  kInput16bits##input,              \
   3592                                  kInput8bitsImm##input_imm);       \
   3593   }                                                                \
   3594   TEST(mnemonic##2_8H_2OPIMM) {                                    \
   3595     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
   3596                                  8H,                               \
   3597                                  4S,                               \
   3598                                  kInput32bits##input,              \
   3599                                  kInput16bitsImm##input_imm);      \
   3600   }                                                                \
   3601   TEST(mnemonic##2_4S_2OPIMM) {                                    \
   3602     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
   3603                                  4S,                               \
   3604                                  2D,                               \
   3605                                  kInput64bits##input,              \
   3606                                  kInput32bitsImm##input_imm);      \
   3607   }
   3608 
   3609 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
   3610   TEST(mnemonic##_B_2OPIMM) {                                             \
   3611     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   3612                                  B,                                       \
   3613                                  H,                                       \
   3614                                  kInput16bits##input,                     \
   3615                                  kInput8bitsImm##input_imm);              \
   3616   }                                                                       \
   3617   TEST(mnemonic##_H_2OPIMM) {                                             \
   3618     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   3619                                  H,                                       \
   3620                                  S,                                       \
   3621                                  kInput32bits##input,                     \
   3622                                  kInput16bitsImm##input_imm);             \
   3623   }                                                                       \
   3624   TEST(mnemonic##_S_2OPIMM) {                                             \
   3625     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
   3626                                  S,                                       \
   3627                                  D,                                       \
   3628                                  kInput64bits##input,                     \
   3629                                  kInput32bitsImm##input_imm);             \
   3630   }
   3631 
   3632 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
   3633   TEST(mnemonic##_2S_2OPIMM) {                                        \
   3634     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3635                                  2S,                                  \
   3636                                  2S,                                  \
   3637                                  kInputFloat##Basic,                  \
   3638                                  kInputDoubleImm##input_imm)          \
   3639   }                                                                   \
   3640   TEST(mnemonic##_4S_2OPIMM) {                                        \
   3641     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3642                                  4S,                                  \
   3643                                  4S,                                  \
   3644                                  kInputFloat##input,                  \
   3645                                  kInputDoubleImm##input_imm);         \
   3646   }                                                                   \
   3647   TEST(mnemonic##_2D_2OPIMM) {                                        \
   3648     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3649                                  2D,                                  \
   3650                                  2D,                                  \
   3651                                  kInputDouble##input,                 \
   3652                                  kInputDoubleImm##input_imm);         \
   3653   }
   3654 
   3655 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
   3656   TEST(mnemonic##_2S_2OPIMM) {                                 \
   3657     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3658                                  2S,                           \
   3659                                  2S,                           \
   3660                                  kInputFloat##Basic,           \
   3661                                  kInput32bitsImm##input_imm)   \
   3662   }                                                            \
   3663   TEST(mnemonic##_4S_2OPIMM) {                                 \
   3664     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3665                                  4S,                           \
   3666                                  4S,                           \
   3667                                  kInputFloat##input,           \
   3668                                  kInput32bitsImm##input_imm)   \
   3669   }                                                            \
   3670   TEST(mnemonic##_2D_2OPIMM) {                                 \
   3671     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3672                                  2D,                           \
   3673                                  2D,                           \
   3674                                  kInputDouble##input,          \
   3675                                  kInput64bitsImm##input_imm)   \
   3676   }
   3677 
   3678 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
   3679   TEST(mnemonic##_S_2OPIMM) {                                         \
   3680     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3681                                  S,                                   \
   3682                                  S,                                   \
   3683                                  kInputFloat##Basic,                  \
   3684                                  kInput32bitsImm##input_imm)          \
   3685   }                                                                   \
   3686   TEST(mnemonic##_D_2OPIMM) {                                         \
   3687     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3688                                  D,                                   \
   3689                                  D,                                   \
   3690                                  kInputDouble##input,                 \
   3691                                  kInput64bitsImm##input_imm)          \
   3692   }
   3693 
   3694 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \
   3695   TEST(mnemonic##_2S_2OPIMM) {                                 \
   3696     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3697                                  2S,                           \
   3698                                  2S,                           \
   3699                                  kInput32bits##input,          \
   3700                                  kInput32bitsImm##input_imm);  \
   3701   }                                                            \
   3702   TEST(mnemonic##_4S_2OPIMM) {                                 \
   3703     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3704                                  4S,                           \
   3705                                  4S,                           \
   3706                                  kInput32bits##input,          \
   3707                                  kInput32bitsImm##input_imm);  \
   3708   }                                                            \
   3709   TEST(mnemonic##_2D_2OPIMM) {                                 \
   3710     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
   3711                                  2D,                           \
   3712                                  2D,                           \
   3713                                  kInput64bits##input,          \
   3714                                  kInput64bitsImm##input_imm);  \
   3715   }
   3716 
   3717 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
   3718   TEST(mnemonic##_D_2OPIMM) {                                        \
   3719     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
   3720                                  D,                                  \
   3721                                  D,                                  \
   3722                                  kInput64bits##input,                \
   3723                                  kInput64bitsImm##input_imm);        \
   3724   }
   3725 
   3726 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \
   3727   TEST(mnemonic##_S_2OPIMM) {                                         \
   3728     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
   3729                                  S,                                   \
   3730                                  S,                                   \
   3731                                  kInput32bits##input,                 \
   3732                                  kInput32bitsImm##input_imm);         \
   3733   }                                                                   \
   3734   DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
   3735 
   3736 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
   3737   TEST(mnemonic##_D_2OPIMM) {                                           \
   3738     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
   3739                                  D,                                     \
   3740                                  D,                                     \
   3741                                  kInputDouble##input,                   \
   3742                                  kInputDoubleImm##input_imm);           \
   3743   }
   3744 
   3745 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \
   3746   TEST(mnemonic##_S_2OPIMM) {                                            \
   3747     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
   3748                                  S,                                      \
   3749                                  S,                                      \
   3750                                  kInputFloat##input,                     \
   3751                                  kInputDoubleImm##input_imm);            \
   3752   }                                                                      \
   3753   DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
   3754 
   3755 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
   3756   TEST(mnemonic##_B_2OPIMM) {                                      \
   3757     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3758                                  B,                                \
   3759                                  B,                                \
   3760                                  kInput8bits##input,               \
   3761                                  kInput8bitsImm##input_imm);       \
   3762   }                                                                \
   3763   TEST(mnemonic##_H_2OPIMM) {                                      \
   3764     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
   3765                                  H,                                \
   3766                                  H,                                \
   3767                                  kInput16bits##input,              \
   3768                                  kInput16bitsImm##input_imm);      \
   3769   }                                                                \
   3770   DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
   3771 
   3772 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
   3773   TEST(mnemonic##_8H_2OPIMM) {                                   \
   3774     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3775                                  8H,                             \
   3776                                  8B,                             \
   3777                                  kInput8bits##input,             \
   3778                                  kInput8bitsImm##input_imm);     \
   3779   }                                                              \
   3780   TEST(mnemonic##_4S_2OPIMM) {                                   \
   3781     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3782                                  4S,                             \
   3783                                  4H,                             \
   3784                                  kInput16bits##input,            \
   3785                                  kInput16bitsImm##input_imm);    \
   3786   }                                                              \
   3787   TEST(mnemonic##_2D_2OPIMM) {                                   \
   3788     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
   3789                                  2D,                             \
   3790                                  2S,                             \
   3791                                  kInput32bits##input,            \
   3792                                  kInput32bitsImm##input_imm);    \
   3793   }                                                              \
   3794   TEST(mnemonic##2_8H_2OPIMM) {                                  \
   3795     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
   3796                                  8H,                             \
   3797                                  16B,                            \
   3798                                  kInput8bits##input,             \
   3799                                  kInput8bitsImm##input_imm);     \
   3800   }                                                              \
   3801   TEST(mnemonic##2_4S_2OPIMM) {                                  \
   3802     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
   3803                                  4S,                             \
   3804                                  8H,                             \
   3805                                  kInput16bits##input,            \
   3806                                  kInput16bitsImm##input_imm);    \
   3807   }                                                              \
   3808   TEST(mnemonic##2_2D_2OPIMM) {                                  \
   3809     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
   3810                                  2D,                             \
   3811                                  4S,                             \
   3812                                  kInput32bits##input,            \
   3813                                  kInput32bitsImm##input_imm);    \
   3814   }
   3815 
   3816 #define CALL_TEST_NEON_HELPER_BYELEMENT(                                  \
   3817     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
   3818   {                                                                       \
   3819     CALL_TEST_NEON_HELPER_ByElement(mnemonic,                             \
   3820                                     vdform,                               \
   3821                                     vnform,                               \
   3822                                     vmform,                               \
   3823                                     input_d,                              \
   3824                                     input_n,                              \
   3825                                     input_m,                              \
   3826                                     indices);                             \
   3827   }
   3828 
   3829 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
   3830   TEST(mnemonic##_4H_4H_H) {                                            \
   3831     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   3832                                     4H,                                 \
   3833                                     4H,                                 \
   3834                                     H,                                  \
   3835                                     kInput16bits##input_d,              \
   3836                                     kInput16bits##input_n,              \
   3837                                     kInput16bits##input_m,              \
   3838                                     kInputHIndices);                    \
   3839   }                                                                     \
   3840   TEST(mnemonic##_8H_8H_H) {                                            \
   3841     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   3842                                     8H,                                 \
   3843                                     8H,                                 \
   3844                                     H,                                  \
   3845                                     kInput16bits##input_d,              \
   3846                                     kInput16bits##input_n,              \
   3847                                     kInput16bits##input_m,              \
   3848                                     kInputHIndices);                    \
   3849   }                                                                     \
   3850   TEST(mnemonic##_2S_2S_S) {                                            \
   3851     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   3852                                     2S,                                 \
   3853                                     2S,                                 \
   3854                                     S,                                  \
   3855                                     kInput32bits##input_d,              \
   3856                                     kInput32bits##input_n,              \
   3857                                     kInput32bits##input_m,              \
   3858                                     kInputSIndices);                    \
   3859   }                                                                     \
   3860   TEST(mnemonic##_4S_4S_S) {                                            \
   3861     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
   3862                                     4S,                                 \
   3863                                     4S,                                 \
   3864                                     S,                                  \
   3865                                     kInput32bits##input_d,              \
   3866                                     kInput32bits##input_n,              \
   3867                                     kInput32bits##input_m,              \
   3868                                     kInputSIndices);                    \
   3869   }
   3870 
   3871 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
   3872   TEST(mnemonic##_H_H_H) {                                                     \
   3873     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
   3874                                     H,                                         \
   3875                                     H,                                         \
   3876                                     H,                                         \
   3877                                     kInput16bits##input_d,                     \
   3878                                     kInput16bits##input_n,                     \
   3879                                     kInput16bits##input_m,                     \
   3880                                     kInputHIndices);                           \
   3881   }                                                                            \
   3882   TEST(mnemonic##_S_S_S) {                                                     \
   3883     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
   3884                                     S,                                         \
   3885                                     S,                                         \
   3886                                     S,                                         \
   3887                                     kInput32bits##input_d,                     \
   3888                                     kInput32bits##input_n,                     \
   3889                                     kInput32bits##input_m,                     \
   3890                                     kInputSIndices);                           \
   3891   }
   3892 
   3893 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
   3894   TEST(mnemonic##_2S_2S_S) {                                               \
   3895     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3896                                     2S,                                    \
   3897                                     2S,                                    \
   3898                                     S,                                     \
   3899                                     kInputFloat##input_d,                  \
   3900                                     kInputFloat##input_n,                  \
   3901                                     kInputFloat##input_m,                  \
   3902                                     kInputSIndices);                       \
   3903   }                                                                        \
   3904   TEST(mnemonic##_4S_4S_S) {                                               \
   3905     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3906                                     4S,                                    \
   3907                                     4S,                                    \
   3908                                     S,                                     \
   3909                                     kInputFloat##input_d,                  \
   3910                                     kInputFloat##input_n,                  \
   3911                                     kInputFloat##input_m,                  \
   3912                                     kInputSIndices);                       \
   3913   }                                                                        \
   3914   TEST(mnemonic##_2D_2D_D) {                                               \
   3915     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
   3916                                     2D,                                    \
   3917                                     2D,                                    \
   3918                                     D,                                     \
   3919                                     kInputDouble##input_d,                 \
   3920                                     kInputDouble##input_n,                 \
   3921                                     kInputDouble##input_m,                 \
   3922                                     kInputDIndices);                       \
   3923   }
   3924 
   3925 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
   3926   TEST(mnemonic##_S_S_S) {                                                  \
   3927     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
   3928                                     S,                                      \
   3929                                     S,                                      \
   3930                                     S,                                      \
   3931                                     kInputFloat##inp_d,                     \
   3932                                     kInputFloat##inp_n,                     \
   3933                                     kInputFloat##inp_m,                     \
   3934                                     kInputSIndices);                        \
   3935   }                                                                         \
   3936   TEST(mnemonic##_D_D_D) {                                                  \
   3937     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
   3938                                     D,                                      \
   3939                                     D,                                      \
   3940                                     D,                                      \
   3941                                     kInputDouble##inp_d,                    \
   3942                                     kInputDouble##inp_n,                    \
   3943                                     kInputDouble##inp_m,                    \
   3944                                     kInputDIndices);                        \
   3945   }
   3946 
   3947 
   3948 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
   3949   TEST(mnemonic##_4S_4H_H) {                                                 \
   3950     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
   3951                                     4S,                                      \
   3952                                     4H,                                      \
   3953                                     H,                                       \
   3954                                     kInput32bits##input_d,                   \
   3955                                     kInput16bits##input_n,                   \
   3956                                     kInput16bits##input_m,                   \
   3957                                     kInputHIndices);                         \
   3958   }                                                                          \
   3959   TEST(mnemonic##2_4S_8H_H) {                                                \
   3960     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
   3961                                     4S,                                      \
   3962                                     8H,                                      \
   3963                                     H,                                       \
   3964                                     kInput32bits##input_d,                   \
   3965                                     kInput16bits##input_n,                   \
   3966                                     kInput16bits##input_m,                   \
   3967                                     kInputHIndices);                         \
   3968   }                                                                          \
   3969   TEST(mnemonic##_2D_2S_S) {                                                 \
   3970     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
   3971                                     2D,                                      \
   3972                                     2S,                                      \
   3973                                     S,                                       \
   3974                                     kInput64bits##input_d,                   \
   3975                                     kInput32bits##input_n,                   \
   3976                                     kInput32bits##input_m,                   \
   3977                                     kInputSIndices);                         \
   3978   }                                                                          \
   3979   TEST(mnemonic##2_2D_4S_S) {                                                \
   3980     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
   3981                                     2D,                                      \
   3982                                     4S,                                      \
   3983                                     S,                                       \
   3984                                     kInput64bits##input_d,                   \
   3985                                     kInput32bits##input_n,                   \
   3986                                     kInput32bits##input_m,                   \
   3987                                     kInputSIndices);                         \
   3988   }
   3989 
   3990 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(            \
   3991     mnemonic, input_d, input_n, input_m)                   \
   3992   TEST(mnemonic##_S_H_H) {                                 \
   3993     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
   3994                                     S,                     \
   3995                                     H,                     \
   3996                                     H,                     \
   3997                                     kInput32bits##input_d, \
   3998                                     kInput16bits##input_n, \
   3999                                     kInput16bits##input_m, \
   4000                                     kInputHIndices);       \
   4001   }                                                        \
   4002   TEST(mnemonic##_D_S_S) {                                 \
   4003     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
   4004                                     D,                     \
   4005                                     S,                     \
   4006                                     S,                     \
   4007                                     kInput64bits##input_d, \
   4008                                     kInput32bits##input_n, \
   4009                                     kInput32bits##input_m, \
   4010                                     kInputSIndices);       \
   4011   }
   4012 
   4013 
   4014 #define CALL_TEST_NEON_HELPER_2OP2IMM(                           \
   4015     mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
   4016   {                                                              \
   4017     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,  \
   4018                                      mnemonic,                   \
   4019                                      variant,                    \
   4020                                      variant,                    \
   4021                                      input_d,                    \
   4022                                      input_imm1,                 \
   4023                                      input_n,                    \
   4024                                      input_imm2);                \
   4025   }
   4026 
   4027 #define DEFINE_TEST_NEON_2OP2IMM(                               \
   4028     mnemonic, input_d, input_imm1, input_n, input_imm2)         \
   4029   TEST(mnemonic##_B) {                                          \
   4030     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4031                                   16B,                          \
   4032                                   kInput8bits##input_d,         \
   4033                                   kInput8bitsImm##input_imm1,   \
   4034                                   kInput8bits##input_n,         \
   4035                                   kInput8bitsImm##input_imm2);  \
   4036   }                                                             \
   4037   TEST(mnemonic##_H) {                                          \
   4038     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4039                                   8H,                           \
   4040                                   kInput16bits##input_d,        \
   4041                                   kInput16bitsImm##input_imm1,  \
   4042                                   kInput16bits##input_n,        \
   4043                                   kInput16bitsImm##input_imm2); \
   4044   }                                                             \
   4045   TEST(mnemonic##_S) {                                          \
   4046     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4047                                   4S,                           \
   4048                                   kInput32bits##input_d,        \
   4049                                   kInput32bitsImm##input_imm1,  \
   4050                                   kInput32bits##input_n,        \
   4051                                   kInput32bitsImm##input_imm2); \
   4052   }                                                             \
   4053   TEST(mnemonic##_D) {                                          \
   4054     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
   4055                                   2D,                           \
   4056                                   kInput64bits##input_d,        \
   4057                                   kInput64bitsImm##input_imm1,  \
   4058                                   kInput64bits##input_n,        \
   4059                                   kInput64bitsImm##input_imm2); \
   4060   }
   4061 
   4062 
   4063 // Advanced SIMD copy.
   4064 DEFINE_TEST_NEON_2OP2IMM(
   4065     ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
   4066 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
   4067 
   4068 
   4069 // Advanced SIMD scalar copy.
   4070 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
   4071 
   4072 
   4073 // Advanced SIMD three same.
   4074 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
   4075 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
   4076 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
   4077 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
   4078 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
   4079 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
   4080 DEFINE_TEST_NEON_3SAME(cmge, Basic)
   4081 DEFINE_TEST_NEON_3SAME(sshl, Basic)
   4082 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
   4083 DEFINE_TEST_NEON_3SAME(srshl, Basic)
   4084 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
   4085 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
   4086 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
   4087 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
   4088 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
   4089 DEFINE_TEST_NEON_3SAME(add, Basic)
   4090 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
   4091 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
   4092 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
   4093 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
   4094 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
   4095 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
   4096 DEFINE_TEST_NEON_3SAME(addp, Basic)
   4097 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
   4098 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
   4099 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
   4100 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
   4101 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
   4102 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
   4103 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
   4104 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
   4105 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
   4106 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
   4107 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
   4108 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
   4109 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
   4110 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
   4111 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
   4112 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
   4113 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
   4114 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
   4115 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
   4116 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
   4117 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
   4118 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
   4119 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
   4120 DEFINE_TEST_NEON_3SAME(ushl, Basic)
   4121 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
   4122 DEFINE_TEST_NEON_3SAME(urshl, Basic)
   4123 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
   4124 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
   4125 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
   4126 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
   4127 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
   4128 DEFINE_TEST_NEON_3SAME(sub, Basic)
   4129 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
   4130 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
   4131 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
   4132 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
   4133 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
   4134 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
   4135 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
   4136 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
   4137 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
   4138 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
   4139 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
   4140 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
   4141 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
   4142 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
   4143 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
   4144 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
   4145 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
   4146 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
   4147 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
   4148 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
   4149 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
   4150 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
   4151 
   4152 
   4153 // Advanced SIMD scalar three same.
   4154 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
   4155 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
   4156 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
   4157 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
   4158 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
   4159 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
   4160 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
   4161 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
   4162 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
   4163 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
   4164 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
   4165 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
   4166 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
   4167 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
   4168 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
   4169 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
   4170 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
   4171 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
   4172 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
   4173 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
   4174 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
   4175 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
   4176 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
   4177 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
   4178 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
   4179 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
   4180 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
   4181 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
   4182 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
   4183 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
   4184 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
   4185 
   4186 
   4187 // Advanced SIMD three different.
   4188 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
   4189 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
   4190 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
   4191 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
   4192 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
   4193 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
   4194 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
   4195 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
   4196 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
   4197 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
   4198 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
   4199 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
   4200 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
   4201 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
   4202 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
   4203 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
   4204 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
   4205 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
   4206 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
   4207 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
   4208 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
   4209 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
   4210 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
   4211 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
   4212 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
   4213 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
   4214 
   4215 
   4216 // Advanced SIMD scalar three different.
   4217 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
   4218 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
   4219 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
   4220 
   4221 
   4222 // Advanced SIMD scalar pairwise.
   4223 TEST(addp_SCALAR) {
   4224   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
   4225 }
   4226 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
   4227 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
   4228 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
   4229 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
   4230 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
   4231 
   4232 
   4233 // Advanced SIMD shift by immediate.
   4234 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
   4235 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
   4236 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
   4237 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
   4238 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
   4239 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
   4240 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
   4241 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
   4242 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
   4243 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
   4244 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
   4245 DEFINE_TEST_NEON_2OPIMM_SD(scvtf,
   4246                            FixedPointConversions,
   4247                            TypeWidthFromZeroToWidth)
   4248 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
   4249 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
   4250 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
   4251 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
   4252 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
   4253 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
   4254 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
   4255 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
   4256 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
   4257 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
   4258 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
   4259 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
   4260 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
   4261 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
   4262 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf,
   4263                            FixedPointConversions,
   4264                            TypeWidthFromZeroToWidth)
   4265 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
   4266 
   4267 
   4268 // Advanced SIMD scalar shift by immediate..
   4269 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
   4270 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
   4271 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
   4272 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
   4273 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
   4274 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
   4275 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
   4276 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
   4277 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf,
   4278                                   FixedPointConversions,
   4279                                   TypeWidthFromZeroToWidth)
   4280 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
   4281 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
   4282 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
   4283 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
   4284 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
   4285 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
   4286 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
   4287 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
   4288 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
   4289 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
   4290 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
   4291 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
   4292 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
   4293 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf,
   4294                                   FixedPointConversions,
   4295                                   TypeWidthFromZeroToWidth)
   4296 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
   4297 
   4298 
   4299 // Advanced SIMD two-register miscellaneous.
   4300 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
   4301 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
   4302 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
   4303 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
   4304 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
   4305 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
   4306 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
   4307 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
   4308 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
   4309 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
   4310 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
   4311 DEFINE_TEST_NEON_2SAME(abs, Basic)
   4312 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
   4313 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
   4314 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
   4315 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
   4316 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
   4317 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
   4318 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
   4319 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
   4320 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
   4321 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
   4322 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
   4323 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
   4324 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
   4325 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
   4326 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
   4327 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
   4328 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
   4329 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
   4330 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
   4331 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
   4332 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
   4333 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
   4334 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
   4335 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
   4336 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
   4337 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
   4338 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
   4339 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
   4340 DEFINE_TEST_NEON_2SAME(neg, Basic)
   4341 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
   4342 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
   4343 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
   4344 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
   4345 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
   4346 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
   4347 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
   4348 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
   4349 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
   4350 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
   4351 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
   4352 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
   4353 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
   4354 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
   4355 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
   4356 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
   4357 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
   4358 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
   4359 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
   4360 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
   4361 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
   4362 
   4363 
   4364 // Advanced SIMD scalar two-register miscellaneous.
   4365 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
   4366 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
   4367 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
   4368 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
   4369 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
   4370 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
   4371 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
   4372 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
   4373 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
   4374 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
   4375 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
   4376 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
   4377 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
   4378 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
   4379 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
   4380 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
   4381 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
   4382 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
   4383 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
   4384 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
   4385 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
   4386 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
   4387 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
   4388 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
   4389 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
   4390 TEST(fcvtxn_SCALAR) {
   4391   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
   4392 }
   4393 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
   4394 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
   4395 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
   4396 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
   4397 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
   4398 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
   4399 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
   4400 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
   4401 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
   4402 
   4403 
   4404 // Advanced SIMD across lanes.
   4405 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
   4406 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
   4407 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
   4408 DEFINE_TEST_NEON_ACROSS(addv, Basic)
   4409 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
   4410 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
   4411 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
   4412 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
   4413 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
   4414 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
   4415 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
   4416 
   4417 
   4418 // Advanced SIMD permute.
   4419 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
   4420 DEFINE_TEST_NEON_3SAME(trn1, Basic)
   4421 DEFINE_TEST_NEON_3SAME(zip1, Basic)
   4422 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
   4423 DEFINE_TEST_NEON_3SAME(trn2, Basic)
   4424 DEFINE_TEST_NEON_3SAME(zip2, Basic)
   4425 
   4426 
   4427 // Advanced SIMD vector x indexed element.
   4428 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
   4429 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
   4430 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
   4431 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
   4432 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
   4433 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
   4434 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
   4435 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
   4436 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
   4437 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
   4438 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
   4439 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
   4440 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
   4441 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
   4442 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
   4443 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
   4444 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
   4445 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
   4446 
   4447 
   4448 // Advanced SIMD scalar x indexed element.
   4449 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
   4450 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
   4451 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
   4452 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
   4453 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
   4454 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
   4455 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
   4456 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
   4457 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
   4458 
   4459 
   4460 #undef __
   4461 #define __ masm->
   4462 
   4463 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) &&                 \
   4464     defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
   4465     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
   4466 
   4467 // Generate a function that stores zero to a hard-coded address.
   4468 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
   4469   masm->Reset();
   4470 
   4471   UseScratchRegisterScope temps(masm);
   4472   Register temp = temps.AcquireX();
   4473   __ Mov(temp, reinterpret_cast<intptr_t>(target));
   4474   __ Str(wzr, MemOperand(temp));
   4475   __ Ret();
   4476 
   4477   masm->FinalizeCode();
   4478   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   4479 }
   4480 
   4481 
   4482 // Generate a function that stores the `int32_t` argument to a hard-coded
   4483 // address.
   4484 // In this example and the other below, we use the `abi` object to retrieve
   4485 // argument and return locations even though we could easily hard code them.
   4486 // This mirrors how more generic code (e.g. templated) user would use these
   4487 // mechanisms.
   4488 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
   4489   masm->Reset();
   4490 
   4491   ABI abi;
   4492   Register input =
   4493       Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
   4494 
   4495   UseScratchRegisterScope temps(masm);
   4496   Register temp = temps.AcquireX();
   4497   __ Mov(temp, reinterpret_cast<intptr_t>(target));
   4498   __ Str(input, MemOperand(temp));
   4499   __ Ret();
   4500 
   4501   masm->FinalizeCode();
   4502   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   4503 }
   4504 
   4505 
   4506 // A minimal implementation of a `pow` function.
   4507 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
   4508   masm->Reset();
   4509 
   4510   ABI abi;
   4511   Register input =
   4512       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
   4513   Register result =
   4514       Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
   4515   UseScratchRegisterScope temps(masm);
   4516   Register temp = temps.AcquireX();
   4517 
   4518   __ Mov(temp, 1);
   4519   for (unsigned i = 0; i < pow; i++) {
   4520     __ Mul(temp, temp, input);
   4521   }
   4522   __ Mov(result, temp);
   4523   __ Ret();
   4524 
   4525   masm->FinalizeCode();
   4526   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   4527 }
   4528 
   4529 
   4530 Instruction* GenerateSum(MacroAssembler* masm) {
   4531   masm->Reset();
   4532 
   4533   ABI abi;
   4534   FPRegister input_1 =
   4535       FPRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
   4536   Register input_2 =
   4537       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
   4538   FPRegister input_3 =
   4539       FPRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
   4540   FPRegister result =
   4541       FPRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
   4542 
   4543   UseScratchRegisterScope temps(masm);
   4544   FPRegister temp = temps.AcquireD();
   4545 
   4546   __ Fcvt(input_1.D(), input_1);
   4547   __ Scvtf(temp, input_2);
   4548   __ Fadd(temp, temp, input_1.D());
   4549   __ Fadd(result, temp, input_3);
   4550   __ Ret();
   4551 
   4552   masm->FinalizeCode();
   4553   return masm->GetBuffer()->GetStartAddress<Instruction*>();
   4554 }
   4555 
   4556 
   4557 TEST(RunFrom) {
   4558   SETUP();
   4559 
   4560   // Run a function returning `void` and taking no argument.
   4561   int32_t value = 0xbad;
   4562   simulator->RunFrom(GenerateStoreZero(&masm, &value));
   4563   VIXL_CHECK(value == 0);
   4564 
   4565   // Run a function returning `void` and taking one argument.
   4566   int32_t argument = 0xf00d;
   4567   simulator->RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value),
   4568                                     argument);
   4569   VIXL_CHECK(value == 0xf00d);
   4570 
   4571   // Run a function taking one argument and returning a value.
   4572   int64_t res_int64_t;
   4573   res_int64_t =
   4574       simulator->RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
   4575   VIXL_CHECK(res_int64_t == 1);
   4576   res_int64_t =
   4577       simulator->RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
   4578   VIXL_CHECK(res_int64_t == 123);
   4579   res_int64_t = simulator->RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
   4580   VIXL_CHECK(res_int64_t == 1024);
   4581 
   4582   // Run a function taking multiple arguments in registers.
   4583   double res_double =
   4584       simulator->RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
   4585                                                          1.0,
   4586                                                          2,
   4587                                                          3.0);
   4588   VIXL_CHECK(res_double == 6.0);
   4589 }
   4590 #endif
   4591 
   4592 
   4593 }  // namespace aarch64
   4594 }  // namespace vixl
   4595