Home | History | Annotate | Download | only in a64
      1 // Copyright 2015, ARM Limited
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifndef VIXL_A64_SIMULATOR_A64_H_
     28 #define VIXL_A64_SIMULATOR_A64_H_
     29 
     30 #include "vixl/globals.h"
     31 #include "vixl/utils.h"
     32 #include "vixl/a64/instructions-a64.h"
     33 #include "vixl/a64/assembler-a64.h"
     34 #include "vixl/a64/disasm-a64.h"
     35 #include "vixl/a64/instrument-a64.h"
     36 
     37 namespace vixl {
     38 
     39 // Debug instructions.
     40 //
     41 // VIXL's macro-assembler and simulator support a few pseudo instructions to
     42 // make debugging easier. These pseudo instructions do not exist on real
     43 // hardware.
     44 //
     45 // TODO: Provide controls to prevent the macro assembler from emitting
     46 // pseudo-instructions. This is important for ahead-of-time compilers, where the
     47 // macro assembler is built with USE_SIMULATOR but the code will eventually be
     48 // run on real hardware.
     49 //
     50 // TODO: Also consider allowing these pseudo-instructions to be disabled in the
     51 // simulator, so that users can check that the input is a valid native code.
     52 // (This isn't possible in all cases. Printf won't work, for example.)
     53 //
     54 // Each debug pseudo instruction is represented by a HLT instruction. The HLT
     55 // immediate field is used to identify the type of debug pseudo instruction.
     56 
     57 enum DebugHltOpcodes {
     58   kUnreachableOpcode = 0xdeb0,
     59   kPrintfOpcode,
     60   kTraceOpcode,
     61   kLogOpcode,
     62   // Aliases.
     63   kDebugHltFirstOpcode = kUnreachableOpcode,
     64   kDebugHltLastOpcode = kLogOpcode
     65 };
     66 
     67 // Each pseudo instruction uses a custom encoding for additional arguments, as
     68 // described below.
     69 
     70 // Unreachable - kUnreachableOpcode
     71 //
     72 // Instruction which should never be executed. This is used as a guard in parts
     73 // of the code that should not be reachable, such as in data encoded inline in
     74 // the instructions.
     75 
     76 // Printf - kPrintfOpcode
     77 //  - arg_count: The number of arguments.
     78 //  - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields.
     79 //
     80 // Simulate a call to printf.
     81 //
     82 // Floating-point and integer arguments are passed in separate sets of registers
     83 // in AAPCS64 (even for varargs functions), so it is not possible to determine
     84 // the type of each argument without some information about the values that were
     85 // passed in. This information could be retrieved from the printf format string,
     86 // but the format string is not trivial to parse so we encode the relevant
     87 // information with the HLT instruction.
     88 //
     89 // Also, the following registers are populated (as if for a native A64 call):
     90 //    x0: The format string
     91 // x1-x7: Optional arguments, if type == CPURegister::kRegister
     92 // d0-d7: Optional arguments, if type == CPURegister::kFPRegister
     93 const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
     94 const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
     95 const unsigned kPrintfLength = 3 * kInstructionSize;
     96 
     97 const unsigned kPrintfMaxArgCount = 4;
     98 
     99 // The argument pattern is a set of two-bit-fields, each with one of the
    100 // following values:
    101 enum PrintfArgPattern {
    102   kPrintfArgW = 1,
    103   kPrintfArgX = 2,
    104   // There is no kPrintfArgS because floats are always converted to doubles in C
    105   // varargs calls.
    106   kPrintfArgD = 3
    107 };
    108 static const unsigned kPrintfArgPatternBits = 2;
    109 
    110 // Trace - kTraceOpcode
    111 //  - parameter: TraceParameter stored as a uint32_t
    112 //  - command: TraceCommand stored as a uint32_t
    113 //
    114 // Allow for trace management in the generated code. This enables or disables
    115 // automatic tracing of the specified information for every simulated
    116 // instruction.
    117 const unsigned kTraceParamsOffset = 1 * kInstructionSize;
    118 const unsigned kTraceCommandOffset = 2 * kInstructionSize;
    119 const unsigned kTraceLength = 3 * kInstructionSize;
    120 
    121 // Trace parameters.
    122 enum TraceParameters {
    123   LOG_DISASM     = 1 << 0,  // Log disassembly.
    124   LOG_REGS       = 1 << 1,  // Log general purpose registers.
    125   LOG_VREGS      = 1 << 2,  // Log NEON and floating-point registers.
    126   LOG_SYSREGS    = 1 << 3,  // Log the flags and system registers.
    127   LOG_WRITE      = 1 << 4,  // Log writes to memory.
    128 
    129   LOG_NONE       = 0,
    130   LOG_STATE      = LOG_REGS | LOG_VREGS | LOG_SYSREGS,
    131   LOG_ALL        = LOG_DISASM | LOG_STATE | LOG_WRITE
    132 };
    133 
    134 // Trace commands.
    135 enum TraceCommand {
    136   TRACE_ENABLE   = 1,
    137   TRACE_DISABLE  = 2
    138 };
    139 
    140 // Log - kLogOpcode
    141 //  - parameter: TraceParameter stored as a uint32_t
    142 //
    143 // Print the specified information once. This mechanism is separate from Trace.
    144 // In particular, _all_ of the specified registers are printed, rather than just
    145 // the registers that the instruction writes.
    146 //
    147 // Any combination of the TraceParameters values can be used, except that
    148 // LOG_DISASM is not supported for Log.
    149 const unsigned kLogParamsOffset = 1 * kInstructionSize;
    150 const unsigned kLogLength = 2 * kInstructionSize;
    151 
    152 
    153 // Assemble the specified IEEE-754 components into the target type and apply
    154 // appropriate rounding.
    155 //  sign:     0 = positive, 1 = negative
    156 //  exponent: Unbiased IEEE-754 exponent.
    157 //  mantissa: The mantissa of the input. The top bit (which is not encoded for
    158 //            normal IEEE-754 values) must not be omitted. This bit has the
    159 //            value 'pow(2, exponent)'.
    160 //
    161 // The input value is assumed to be a normalized value. That is, the input may
    162 // not be infinity or NaN. If the source value is subnormal, it must be
    163 // normalized before calling this function such that the highest set bit in the
    164 // mantissa has the value 'pow(2, exponent)'.
    165 //
    166 // Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
    167 // calling a templated FPRound.
    168 template <class T, int ebits, int mbits>
    169 T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
    170                  FPRounding round_mode) {
    171   VIXL_ASSERT((sign == 0) || (sign == 1));
    172 
    173   // Only FPTieEven and FPRoundOdd rounding modes are implemented.
    174   VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
    175 
    176   // Rounding can promote subnormals to normals, and normals to infinities. For
    177   // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
    178   // encodable as a float, but rounding based on the low-order mantissa bits
    179   // could make it overflow. With ties-to-even rounding, this value would become
    180   // an infinity.
    181 
    182   // ---- Rounding Method ----
    183   //
    184   // The exponent is irrelevant in the rounding operation, so we treat the
    185   // lowest-order bit that will fit into the result ('onebit') as having
    186   // the value '1'. Similarly, the highest-order bit that won't fit into
    187   // the result ('halfbit') has the value '0.5'. The 'point' sits between
    188   // 'onebit' and 'halfbit':
    189   //
    190   //            These bits fit into the result.
    191   //               |---------------------|
    192   //  mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
    193   //                                     ||
    194   //                                    / |
    195   //                                   /  halfbit
    196   //                               onebit
    197   //
    198   // For subnormal outputs, the range of representable bits is smaller and
    199   // the position of onebit and halfbit depends on the exponent of the
    200   // input, but the method is otherwise similar.
    201   //
    202   //   onebit(frac)
    203   //     |
    204   //     | halfbit(frac)          halfbit(adjusted)
    205   //     | /                      /
    206   //     | |                      |
    207   //  0b00.0 (exact)      -> 0b00.0 (exact)                    -> 0b00
    208   //  0b00.0...           -> 0b00.0...                         -> 0b00
    209   //  0b00.1 (exact)      -> 0b00.0111..111                    -> 0b00
    210   //  0b00.1...           -> 0b00.1...                         -> 0b01
    211   //  0b01.0 (exact)      -> 0b01.0 (exact)                    -> 0b01
    212   //  0b01.0...           -> 0b01.0...                         -> 0b01
    213   //  0b01.1 (exact)      -> 0b01.1 (exact)                    -> 0b10
    214   //  0b01.1...           -> 0b01.1...                         -> 0b10
    215   //  0b10.0 (exact)      -> 0b10.0 (exact)                    -> 0b10
    216   //  0b10.0...           -> 0b10.0...                         -> 0b10
    217   //  0b10.1 (exact)      -> 0b10.0111..111                    -> 0b10
    218   //  0b10.1...           -> 0b10.1...                         -> 0b11
    219   //  0b11.0 (exact)      -> 0b11.0 (exact)                    -> 0b11
    220   //  ...                   /             |                      /   |
    221   //                       /              |                     /    |
    222   //                                                           /     |
    223   // adjusted = frac - (halfbit(mantissa) & ~onebit(frac));   /      |
    224   //
    225   //                   mantissa = (mantissa >> shift) + halfbit(adjusted);
    226 
    227   static const int mantissa_offset = 0;
    228   static const int exponent_offset = mantissa_offset + mbits;
    229   static const int sign_offset = exponent_offset + ebits;
    230   VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
    231 
    232   // Bail out early for zero inputs.
    233   if (mantissa == 0) {
    234     return sign << sign_offset;
    235   }
    236 
    237   // If all bits in the exponent are set, the value is infinite or NaN.
    238   // This is true for all binary IEEE-754 formats.
    239   static const int infinite_exponent = (1 << ebits) - 1;
    240   static const int max_normal_exponent = infinite_exponent - 1;
    241 
    242   // Apply the exponent bias to encode it for the result. Doing this early makes
    243   // it easy to detect values that will be infinite or subnormal.
    244   exponent += max_normal_exponent >> 1;
    245 
    246   if (exponent > max_normal_exponent) {
    247     // Overflow: the input is too large for the result type to represent.
    248     if (round_mode == FPTieEven) {
    249       // FPTieEven rounding mode handles overflows using infinities.
    250       exponent = infinite_exponent;
    251       mantissa = 0;
    252     } else {
    253       VIXL_ASSERT(round_mode == FPRoundOdd);
    254       // FPRoundOdd rounding mode handles overflows using the largest magnitude
    255       // normal number.
    256       exponent = max_normal_exponent;
    257       mantissa = (UINT64_C(1) << exponent_offset) - 1;
    258     }
    259     return (sign << sign_offset) |
    260            (exponent << exponent_offset) |
    261            (mantissa << mantissa_offset);
    262   }
    263 
    264   // Calculate the shift required to move the top mantissa bit to the proper
    265   // place in the destination type.
    266   const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
    267   int shift = highest_significant_bit - mbits;
    268 
    269   if (exponent <= 0) {
    270     // The output will be subnormal (before rounding).
    271     // For subnormal outputs, the shift must be adjusted by the exponent. The +1
    272     // is necessary because the exponent of a subnormal value (encoded as 0) is
    273     // the same as the exponent of the smallest normal value (encoded as 1).
    274     shift += -exponent + 1;
    275 
    276     // Handle inputs that would produce a zero output.
    277     //
    278     // Shifts higher than highest_significant_bit+1 will always produce a zero
    279     // result. A shift of exactly highest_significant_bit+1 might produce a
    280     // non-zero result after rounding.
    281     if (shift > (highest_significant_bit + 1)) {
    282       if (round_mode == FPTieEven) {
    283         // The result will always be +/-0.0.
    284         return sign << sign_offset;
    285       } else {
    286         VIXL_ASSERT(round_mode == FPRoundOdd);
    287         VIXL_ASSERT(mantissa != 0);
    288         // For FPRoundOdd, if the mantissa is too small to represent and
    289         // non-zero return the next "odd" value.
    290         return (sign << sign_offset) | 1;
    291       }
    292     }
    293 
    294     // Properly encode the exponent for a subnormal output.
    295     exponent = 0;
    296   } else {
    297     // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
    298     // normal values.
    299     mantissa &= ~(UINT64_C(1) << highest_significant_bit);
    300   }
    301 
    302   if (shift > 0) {
    303     if (round_mode == FPTieEven) {
    304       // We have to shift the mantissa to the right. Some precision is lost, so
    305       // we need to apply rounding.
    306       uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
    307       uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1;
    308       uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
    309       uint64_t adjusted = mantissa - adjustment;
    310       T halfbit_adjusted = (adjusted >> (shift-1)) & 1;
    311 
    312       T result = (sign << sign_offset) |
    313                  (exponent << exponent_offset) |
    314                  ((mantissa >> shift) << mantissa_offset);
    315 
    316       // A very large mantissa can overflow during rounding. If this happens,
    317       // the exponent should be incremented and the mantissa set to 1.0
    318       // (encoded as 0). Applying halfbit_adjusted after assembling the float
    319       // has the nice side-effect that this case is handled for free.
    320       //
    321       // This also handles cases where a very large finite value overflows to
    322       // infinity, or where a very large subnormal value overflows to become
    323       // normal.
    324       return result + halfbit_adjusted;
    325     } else {
    326       VIXL_ASSERT(round_mode == FPRoundOdd);
    327       // If any bits at position halfbit or below are set, onebit (ie. the
    328       // bottom bit of the resulting mantissa) must be set.
    329       uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
    330       if (fractional_bits != 0) {
    331         mantissa |= UINT64_C(1) << shift;
    332       }
    333 
    334       return (sign << sign_offset) |
    335              (exponent << exponent_offset) |
    336              ((mantissa >> shift) << mantissa_offset);
    337     }
    338   } else {
    339     // We have to shift the mantissa to the left (or not at all). The input
    340     // mantissa is exactly representable in the output mantissa, so apply no
    341     // rounding correction.
    342     return (sign << sign_offset) |
    343            (exponent << exponent_offset) |
    344            ((mantissa << -shift) << mantissa_offset);
    345   }
    346 }
    347 
    348 
    349 // Representation of memory, with typed getters and setters for access.
    350 class Memory {
    351  public:
    352   template <typename T>
    353   static T AddressUntag(T address) {
    354     // Cast the address using a C-style cast. A reinterpret_cast would be
    355     // appropriate, but it can't cast one integral type to another.
    356     uint64_t bits = (uint64_t)address;
    357     return (T)(bits & ~kAddressTagMask);
    358   }
    359 
    360   template <typename T, typename A>
    361   static T Read(A address) {
    362     T value;
    363     address = AddressUntag(address);
    364     VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
    365                 (sizeof(value) == 4) || (sizeof(value) == 8) ||
    366                 (sizeof(value) == 16));
    367     memcpy(&value, reinterpret_cast<const char *>(address), sizeof(value));
    368     return value;
    369   }
    370 
    371   template <typename T, typename A>
    372   static void Write(A address, T value) {
    373     address = AddressUntag(address);
    374     VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
    375                 (sizeof(value) == 4) || (sizeof(value) == 8) ||
    376                 (sizeof(value) == 16));
    377     memcpy(reinterpret_cast<char *>(address), &value, sizeof(value));
    378   }
    379 };
    380 
    381 // Represent a register (r0-r31, v0-v31).
    382 template<int kSizeInBytes>
    383 class SimRegisterBase {
    384  public:
    385   SimRegisterBase() : written_since_last_log_(false) {}
    386 
    387   // Write the specified value. The value is zero-extended if necessary.
    388   template<typename T>
    389   void Set(T new_value) {
    390     VIXL_STATIC_ASSERT(sizeof(new_value) <= kSizeInBytes);
    391     if (sizeof(new_value) < kSizeInBytes) {
    392       // All AArch64 registers are zero-extending.
    393       memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
    394     }
    395     memcpy(value_, &new_value, sizeof(new_value));
    396     NotifyRegisterWrite();
    397   }
    398 
    399   // Insert a typed value into a register, leaving the rest of the register
    400   // unchanged. The lane parameter indicates where in the register the value
    401   // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
    402   // 0 represents the least significant bits.
    403   template<typename T>
    404   void Insert(int lane, T new_value) {
    405     VIXL_ASSERT(lane >= 0);
    406     VIXL_ASSERT((sizeof(new_value) +
    407                  (lane * sizeof(new_value))) <= kSizeInBytes);
    408     memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value));
    409     NotifyRegisterWrite();
    410   }
    411 
    412   // Read the value as the specified type. The value is truncated if necessary.
    413   template<typename T>
    414   T Get(int lane = 0) const {
    415     T result;
    416     VIXL_ASSERT(lane >= 0);
    417     VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes);
    418     memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));
    419     return result;
    420   }
    421 
    422   // TODO: Make this return a map of updated bytes, so that we can highlight
    423   // updated lanes for load-and-insert. (That never happens for scalar code, but
    424   // NEON has some instructions that can update individual lanes.)
    425   bool WrittenSinceLastLog() const {
    426     return written_since_last_log_;
    427   }
    428 
    429   void NotifyRegisterLogged() {
    430     written_since_last_log_ = false;
    431   }
    432 
    433  protected:
    434   uint8_t value_[kSizeInBytes];
    435 
    436   // Helpers to aid with register tracing.
    437   bool written_since_last_log_;
    438 
    439   void NotifyRegisterWrite() {
    440     written_since_last_log_ = true;
    441   }
    442 };
    443 typedef SimRegisterBase<kXRegSizeInBytes> SimRegister;      // r0-r31
    444 typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister;     // v0-v31
    445 
    446 // Representation of a vector register, with typed getters and setters for lanes
    447 // and additional information to represent lane state.
    448 class LogicVRegister {
    449  public:
    450   inline LogicVRegister(SimVRegister& other)  // NOLINT
    451       : register_(other) {
    452     for (unsigned i = 0; i < sizeof(saturated_) / sizeof(saturated_[0]); i++) {
    453       saturated_[i] = kNotSaturated;
    454     }
    455     for (unsigned i = 0; i < sizeof(round_) / sizeof(round_[0]); i++) {
    456       round_[i] = 0;
    457     }
    458   }
    459 
    460   int64_t Int(VectorFormat vform, int index) const {
    461     int64_t element;
    462     switch (LaneSizeInBitsFromFormat(vform)) {
    463       case 8: element = register_.Get<int8_t>(index); break;
    464       case 16: element = register_.Get<int16_t>(index); break;
    465       case 32: element = register_.Get<int32_t>(index); break;
    466       case 64: element = register_.Get<int64_t>(index); break;
    467       default: VIXL_UNREACHABLE(); return 0;
    468     }
    469     return element;
    470   }
    471 
    472   uint64_t Uint(VectorFormat vform, int index) const {
    473     uint64_t element;
    474     switch (LaneSizeInBitsFromFormat(vform)) {
    475       case 8: element = register_.Get<uint8_t>(index); break;
    476       case 16: element = register_.Get<uint16_t>(index); break;
    477       case 32: element = register_.Get<uint32_t>(index); break;
    478       case 64: element = register_.Get<uint64_t>(index); break;
    479       default: VIXL_UNREACHABLE(); return 0;
    480     }
    481     return element;
    482   }
    483 
    484   int64_t IntLeftJustified(VectorFormat vform, int index) const {
    485     return Int(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
    486   }
    487 
    488   uint64_t UintLeftJustified(VectorFormat vform, int index) const {
    489     return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
    490   }
    491 
    492   void SetInt(VectorFormat vform, int index, int64_t value) const {
    493     switch (LaneSizeInBitsFromFormat(vform)) {
    494       case 8: register_.Insert(index, static_cast<int8_t>(value)); break;
    495       case 16: register_.Insert(index, static_cast<int16_t>(value)); break;
    496       case 32: register_.Insert(index, static_cast<int32_t>(value)); break;
    497       case 64: register_.Insert(index, static_cast<int64_t>(value)); break;
    498       default: VIXL_UNREACHABLE(); return;
    499     }
    500   }
    501 
    502   void SetUint(VectorFormat vform, int index, uint64_t value) const {
    503     switch (LaneSizeInBitsFromFormat(vform)) {
    504       case 8: register_.Insert(index, static_cast<uint8_t>(value)); break;
    505       case 16: register_.Insert(index, static_cast<uint16_t>(value)); break;
    506       case 32: register_.Insert(index, static_cast<uint32_t>(value)); break;
    507       case 64: register_.Insert(index, static_cast<uint64_t>(value)); break;
    508       default: VIXL_UNREACHABLE(); return;
    509     }
    510   }
    511 
    512   void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const {
    513     switch (LaneSizeInBitsFromFormat(vform)) {
    514       case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); break;
    515       case 16: register_.Insert(index, Memory::Read<uint16_t>(addr)); break;
    516       case 32: register_.Insert(index, Memory::Read<uint32_t>(addr)); break;
    517       case 64: register_.Insert(index, Memory::Read<uint64_t>(addr)); break;
    518       default: VIXL_UNREACHABLE(); return;
    519     }
    520   }
    521 
    522   void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const {
    523     switch (LaneSizeInBitsFromFormat(vform)) {
    524       case 8: Memory::Write<uint8_t>(addr, Uint(vform, index)); break;
    525       case 16: Memory::Write<uint16_t>(addr, Uint(vform, index)); break;
    526       case 32: Memory::Write<uint32_t>(addr, Uint(vform, index)); break;
    527       case 64: Memory::Write<uint64_t>(addr, Uint(vform, index)); break;
    528     }
    529   }
    530 
    531   template <typename T>
    532   T Float(int index) const {
    533     return register_.Get<T>(index);
    534   }
    535 
    536   template <typename T>
    537   void SetFloat(int index, T value) const {
    538     register_.Insert(index, value);
    539   }
    540 
    541   // When setting a result in a register of size less than Q, the top bits of
    542   // the Q register must be cleared.
    543   void ClearForWrite(VectorFormat vform) const {
    544     unsigned size = RegisterSizeInBytesFromFormat(vform);
    545     for (unsigned i = size; i < kQRegSizeInBytes; i++) {
    546       SetUint(kFormat16B, i, 0);
    547     }
    548   }
    549 
    550   // Saturation state for each lane of a vector.
    551   enum Saturation {
    552     kNotSaturated = 0,
    553     kSignedSatPositive = 1 << 0,
    554     kSignedSatNegative = 1 << 1,
    555     kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
    556     kSignedSatUndefined = kSignedSatMask,
    557     kUnsignedSatPositive = 1 << 2,
    558     kUnsignedSatNegative = 1 << 3,
    559     kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
    560     kUnsignedSatUndefined = kUnsignedSatMask
    561   };
    562 
    563   // Getters for saturation state.
    564   Saturation GetSignedSaturation(int index) {
    565     return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
    566   }
    567 
    568   Saturation GetUnsignedSaturation(int index) {
    569     return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
    570   }
    571 
    572   // Setters for saturation state.
    573   void ClearSat(int index) {
    574     saturated_[index] = kNotSaturated;
    575   }
    576 
    577   void SetSignedSat(int index, bool positive) {
    578     SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
    579   }
    580 
    581   void SetUnsignedSat(int index, bool positive) {
    582     SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
    583   }
    584 
    585   void SetSatFlag(int index, Saturation sat) {
    586     saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
    587     VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined);
    588     VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined);
    589   }
    590 
    591   // Saturate lanes of a vector based on saturation state.
    592   LogicVRegister& SignedSaturate(VectorFormat vform) {
    593     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    594       Saturation sat = GetSignedSaturation(i);
    595       if (sat == kSignedSatPositive) {
    596         SetInt(vform, i, MaxIntFromFormat(vform));
    597       } else if (sat == kSignedSatNegative) {
    598         SetInt(vform, i, MinIntFromFormat(vform));
    599       }
    600     }
    601     return *this;
    602   }
    603 
    604   LogicVRegister& UnsignedSaturate(VectorFormat vform) {
    605     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    606       Saturation sat = GetUnsignedSaturation(i);
    607       if (sat == kUnsignedSatPositive) {
    608         SetUint(vform, i, MaxUintFromFormat(vform));
    609       } else if (sat == kUnsignedSatNegative) {
    610         SetUint(vform, i, 0);
    611       }
    612     }
    613     return *this;
    614   }
    615 
    616   // Getter for rounding state.
    617   bool GetRounding(int index) {
    618     return round_[index];
    619   }
    620 
    621   // Setter for rounding state.
    622   void SetRounding(int index, bool round) {
    623     round_[index] = round;
    624   }
    625 
    626   // Round lanes of a vector based on rounding state.
    627   LogicVRegister& Round(VectorFormat vform) {
    628     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    629       SetInt(vform, i, Int(vform, i) + (GetRounding(i) ? 1 : 0));
    630     }
    631     return *this;
    632   }
    633 
    634   // Unsigned halve lanes of a vector, and use the saturation state to set the
    635   // top bit.
    636   LogicVRegister& Uhalve(VectorFormat vform) {
    637     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    638       uint64_t val = Uint(vform, i);
    639       SetRounding(i, (val & 1) == 1);
    640       val >>= 1;
    641       if (GetUnsignedSaturation(i) != kNotSaturated) {
    642         // If the operation causes unsigned saturation, the bit shifted into the
    643         // most significant bit must be set.
    644         val |= (MaxUintFromFormat(vform) >> 1) + 1;
    645       }
    646       SetInt(vform, i, val);
    647     }
    648     return *this;
    649   }
    650 
    651   // Signed halve lanes of a vector, and use the carry state to set the top bit.
    652   LogicVRegister& Halve(VectorFormat vform) {
    653     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    654       int64_t val = Int(vform, i);
    655       SetRounding(i, (val & 1) == 1);
    656       val >>= 1;
    657       if (GetSignedSaturation(i) != kNotSaturated) {
    658         // If the operation causes signed saturation, the sign bit must be
    659         // inverted.
    660         val ^= (MaxUintFromFormat(vform) >> 1) + 1;
    661       }
    662       SetInt(vform, i, val);
    663     }
    664     return *this;
    665   }
    666 
    667  private:
    668   SimVRegister& register_;
    669 
    670   // Allocate one saturation state entry per lane; largest register is type Q,
    671   // and lanes can be a minimum of one byte wide.
    672   Saturation saturated_[kQRegSizeInBytes];
    673 
    674   // Allocate one rounding state entry per lane.
    675   bool round_[kQRegSizeInBytes];
    676 };
    677 
    678 // The proper way to initialize a simulated system register (such as NZCV) is as
    679 // follows:
    680 //  SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV);
    681 class SimSystemRegister {
    682  public:
    683   // The default constructor represents a register which has no writable bits.
    684   // It is not possible to set its value to anything other than 0.
    685   SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) { }
    686 
    687   uint32_t RawValue() const {
    688     return value_;
    689   }
    690 
    691   void SetRawValue(uint32_t new_value) {
    692     value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
    693   }
    694 
    695   uint32_t Bits(int msb, int lsb) const {
    696     return unsigned_bitextract_32(msb, lsb, value_);
    697   }
    698 
    699   int32_t SignedBits(int msb, int lsb) const {
    700     return signed_bitextract_32(msb, lsb, value_);
    701   }
    702 
    703   void SetBits(int msb, int lsb, uint32_t bits);
    704 
    705   // Default system register values.
    706   static SimSystemRegister DefaultValueFor(SystemRegister id);
    707 
    708 #define DEFINE_GETTER(Name, HighBit, LowBit, Func)                            \
    709   uint32_t Name() const { return Func(HighBit, LowBit); }              \
    710   void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
    711 #define DEFINE_WRITE_IGNORE_MASK(Name, Mask)                                  \
    712   static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);
    713 
    714   SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK)
    715 
    716 #undef DEFINE_ZERO_BITS
    717 #undef DEFINE_GETTER
    718 
    719  protected:
    720   // Most system registers only implement a few of the bits in the word. Other
    721   // bits are "read-as-zero, write-ignored". The write_ignore_mask argument
    722   // describes the bits which are not modifiable.
    723   SimSystemRegister(uint32_t value, uint32_t write_ignore_mask)
    724       : value_(value), write_ignore_mask_(write_ignore_mask) { }
    725 
    726   uint32_t value_;
    727   uint32_t write_ignore_mask_;
    728 };
    729 
    730 
    731 class SimExclusiveLocalMonitor {
    732  public:
    733   SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) {
    734     Clear();
    735   }
    736 
    737   // Clear the exclusive monitor (like clrex).
    738   void Clear() {
    739     address_ = 0;
    740     size_ = 0;
    741   }
    742 
    743   // Clear the exclusive monitor most of the time.
    744   void MaybeClear() {
    745     if ((seed_ % kSkipClearProbability) != 0) {
    746       Clear();
    747     }
    748 
    749     // Advance seed_ using a simple linear congruential generator.
    750     seed_ = (seed_ * 48271) % 2147483647;
    751   }
    752 
    753   // Mark the address range for exclusive access (like load-exclusive).
    754   void MarkExclusive(uint64_t address, size_t size) {
    755     address_ = address;
    756     size_ = size;
    757   }
    758 
    759   // Return true if the address range is marked (like store-exclusive).
    760   // This helper doesn't implicitly clear the monitor.
    761   bool IsExclusive(uint64_t address, size_t size) {
    762     VIXL_ASSERT(size > 0);
    763     // Be pedantic: Require both the address and the size to match.
    764     return (size == size_) && (address == address_);
    765   }
    766 
    767  private:
    768   uint64_t address_;
    769   size_t size_;
    770 
    771   const int kSkipClearProbability;
    772   uint32_t seed_;
    773 };
    774 
    775 
    776 // We can't accurate simulate the global monitor since it depends on external
    777 // influences. Instead, this implementation occasionally causes accesses to
    778 // fail, according to kPassProbability.
    779 class SimExclusiveGlobalMonitor {
    780  public:
    781   SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}
    782 
    783   bool IsExclusive(uint64_t address, size_t size) {
    784     USE(address);
    785     USE(size);
    786 
    787     bool pass = (seed_ % kPassProbability) != 0;
    788     // Advance seed_ using a simple linear congruential generator.
    789     seed_ = (seed_ * 48271) % 2147483647;
    790     return pass;
    791   }
    792 
    793  private:
    794   const int kPassProbability;
    795   uint32_t seed_;
    796 };
    797 
    798 
    799 class Simulator : public DecoderVisitor {
    800  public:
    801   explicit Simulator(Decoder* decoder, FILE* stream = stdout);
    802   ~Simulator();
    803 
    804   void ResetState();
    805 
    806   // Run the simulator.
    807   virtual void Run();
    808   void RunFrom(const Instruction* first);
    809 
    810   // Simulation helpers.
    811   const Instruction* pc() const { return pc_; }
    812   void set_pc(const Instruction* new_pc) {
    813     pc_ = Memory::AddressUntag(new_pc);
    814     pc_modified_ = true;
    815   }
    816 
    817   void increment_pc() {
    818     if (!pc_modified_) {
    819       pc_ = pc_->NextInstruction();
    820     }
    821 
    822     pc_modified_ = false;
    823   }
    824 
    825   void ExecuteInstruction() {
    826     // The program counter should always be aligned.
    827     VIXL_ASSERT(IsWordAligned(pc_));
    828     decoder_->Decode(pc_);
    829     increment_pc();
    830   }
    831 
    832   // Declare all Visitor functions.
    833   #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
    834   VISITOR_LIST(DECLARE)
    835   #undef DECLARE
    836 
    837   // Integer register accessors.
    838 
    839   // Basic accessor: Read the register as the specified type.
    840   template<typename T>
    841   T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
    842     VIXL_ASSERT(code < kNumberOfRegisters);
    843     if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
    844       T result;
    845       memset(&result, 0, sizeof(result));
    846       return result;
    847     }
    848     return registers_[code].Get<T>();
    849   }
    850 
    851   // Common specialized accessors for the reg() template.
    852   int32_t wreg(unsigned code,
    853                Reg31Mode r31mode = Reg31IsZeroRegister) const {
    854     return reg<int32_t>(code, r31mode);
    855   }
    856 
    857   int64_t xreg(unsigned code,
    858                Reg31Mode r31mode = Reg31IsZeroRegister) const {
    859     return reg<int64_t>(code, r31mode);
    860   }
    861 
    862   // As above, with parameterized size and return type. The value is
    863   // either zero-extended or truncated to fit, as required.
    864   template<typename T>
    865   T reg(unsigned size, unsigned code,
    866         Reg31Mode r31mode = Reg31IsZeroRegister) const {
    867     uint64_t raw;
    868     switch (size) {
    869       case kWRegSize: raw = reg<uint32_t>(code, r31mode); break;
    870       case kXRegSize: raw = reg<uint64_t>(code, r31mode); break;
    871       default:
    872         VIXL_UNREACHABLE();
    873         return 0;
    874     }
    875 
    876     T result;
    877     VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
    878     // Copy the result and truncate to fit. This assumes a little-endian host.
    879     memcpy(&result, &raw, sizeof(result));
    880     return result;
    881   }
    882 
    883   // Use int64_t by default if T is not specified.
    884   int64_t reg(unsigned size, unsigned code,
    885               Reg31Mode r31mode = Reg31IsZeroRegister) const {
    886     return reg<int64_t>(size, code, r31mode);
    887   }
    888 
    889   enum RegLogMode {
    890     LogRegWrites,
    891     NoRegLog
    892   };
    893 
    894   // Write 'value' into an integer register. The value is zero-extended. This
    895   // behaviour matches AArch64 register writes.
    896   template<typename T>
    897   void set_reg(unsigned code, T value,
    898                RegLogMode log_mode = LogRegWrites,
    899                Reg31Mode r31mode = Reg31IsZeroRegister) {
    900     VIXL_STATIC_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
    901                        (sizeof(T) == kXRegSizeInBytes));
    902     VIXL_ASSERT(code < kNumberOfRegisters);
    903 
    904     if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
    905       return;
    906     }
    907 
    908     registers_[code].Set(value);
    909 
    910     if (log_mode == LogRegWrites) LogRegister(code, r31mode);
    911   }
    912 
    913   // Common specialized accessors for the set_reg() template.
    914   void set_wreg(unsigned code, int32_t value,
    915                 RegLogMode log_mode = LogRegWrites,
    916                 Reg31Mode r31mode = Reg31IsZeroRegister) {
    917     set_reg(code, value, log_mode, r31mode);
    918   }
    919 
    920   void set_xreg(unsigned code, int64_t value,
    921                 RegLogMode log_mode = LogRegWrites,
    922                 Reg31Mode r31mode = Reg31IsZeroRegister) {
    923     set_reg(code, value, log_mode, r31mode);
    924   }
    925 
    926   // As above, with parameterized size and type. The value is either
    927   // zero-extended or truncated to fit, as required.
    928   template<typename T>
    929   void set_reg(unsigned size, unsigned code, T value,
    930                RegLogMode log_mode = LogRegWrites,
    931                Reg31Mode r31mode = Reg31IsZeroRegister) {
    932     // Zero-extend the input.
    933     uint64_t raw = 0;
    934     VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
    935     memcpy(&raw, &value, sizeof(value));
    936 
    937     // Write (and possibly truncate) the value.
    938     switch (size) {
    939       case kWRegSize: set_reg<uint32_t>(code, raw, log_mode, r31mode); break;
    940       case kXRegSize: set_reg<uint64_t>(code, raw, log_mode, r31mode); break;
    941       default:
    942         VIXL_UNREACHABLE();
    943         return;
    944     }
    945   }
    946 
    947   // Common specialized accessors for the set_reg() template.
    948 
    949   // Commonly-used special cases.
    950   template<typename T>
    951   void set_lr(T value) {
    952     set_reg(kLinkRegCode, value);
    953   }
    954 
    955   template<typename T>
    956   void set_sp(T value) {
    957     set_reg(31, value, LogRegWrites, Reg31IsStackPointer);
    958   }
    959 
    960   // Vector register accessors.
    961   // These are equivalent to the integer register accessors, but for vector
    962   // registers.
    963 
    964   // A structure for representing a 128-bit Q register.
    965   struct qreg_t { uint8_t val[kQRegSizeInBytes]; };
    966 
    967   // Basic accessor: read the register as the specified type.
    968   template<typename T>
    969   T vreg(unsigned code) const {
    970     VIXL_STATIC_ASSERT((sizeof(T) == kBRegSizeInBytes) ||
    971                        (sizeof(T) == kHRegSizeInBytes) ||
    972                        (sizeof(T) == kSRegSizeInBytes) ||
    973                        (sizeof(T) == kDRegSizeInBytes) ||
    974                        (sizeof(T) == kQRegSizeInBytes));
    975     VIXL_ASSERT(code < kNumberOfVRegisters);
    976 
    977     return vregisters_[code].Get<T>();
    978   }
    979 
    980   // Common specialized accessors for the vreg() template.
    981   int8_t breg(unsigned code) const {
    982     return vreg<int8_t>(code);
    983   }
    984 
    985   int16_t hreg(unsigned code) const {
    986     return vreg<int16_t>(code);
    987   }
    988 
    989   float sreg(unsigned code) const {
    990     return vreg<float>(code);
    991   }
    992 
    993   uint32_t sreg_bits(unsigned code) const {
    994     return vreg<uint32_t>(code);
    995   }
    996 
    997   double dreg(unsigned code) const {
    998     return vreg<double>(code);
    999   }
   1000 
   1001   uint64_t dreg_bits(unsigned code) const {
   1002     return vreg<uint64_t>(code);
   1003   }
   1004 
   1005   qreg_t qreg(unsigned code)  const {
   1006     return vreg<qreg_t>(code);
   1007   }
   1008 
   1009   // As above, with parameterized size and return type. The value is
   1010   // either zero-extended or truncated to fit, as required.
   1011   template<typename T>
   1012   T vreg(unsigned size, unsigned code) const {
   1013     uint64_t raw = 0;
   1014     T result;
   1015 
   1016     switch (size) {
   1017       case kSRegSize: raw = vreg<uint32_t>(code); break;
   1018       case kDRegSize: raw = vreg<uint64_t>(code); break;
   1019       default:
   1020         VIXL_UNREACHABLE();
   1021         break;
   1022     }
   1023 
   1024     VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
   1025     // Copy the result and truncate to fit. This assumes a little-endian host.
   1026     memcpy(&result, &raw, sizeof(result));
   1027     return result;
   1028   }
   1029 
   1030   inline SimVRegister& vreg(unsigned code) {
   1031     return vregisters_[code];
   1032   }
   1033 
   1034   // Basic accessor: Write the specified value.
   1035   template<typename T>
   1036   void set_vreg(unsigned code, T value,
   1037                 RegLogMode log_mode = LogRegWrites) {
   1038     VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) ||
   1039                        (sizeof(value) == kHRegSizeInBytes) ||
   1040                        (sizeof(value) == kSRegSizeInBytes) ||
   1041                        (sizeof(value) == kDRegSizeInBytes) ||
   1042                        (sizeof(value) == kQRegSizeInBytes));
   1043     VIXL_ASSERT(code < kNumberOfVRegisters);
   1044     vregisters_[code].Set(value);
   1045 
   1046     if (log_mode == LogRegWrites) {
   1047       LogVRegister(code, GetPrintRegisterFormat(value));
   1048     }
   1049   }
   1050 
   1051   // Common specialized accessors for the set_vreg() template.
   1052   void set_breg(unsigned code, int8_t value,
   1053                 RegLogMode log_mode = LogRegWrites) {
   1054     set_vreg(code, value, log_mode);
   1055   }
   1056 
   1057   void set_hreg(unsigned code, int16_t value,
   1058                 RegLogMode log_mode = LogRegWrites) {
   1059     set_vreg(code, value, log_mode);
   1060   }
   1061 
   1062   void set_sreg(unsigned code, float value,
   1063                 RegLogMode log_mode = LogRegWrites) {
   1064     set_vreg(code, value, log_mode);
   1065   }
   1066 
   1067   void set_sreg_bits(unsigned code, uint32_t value,
   1068                 RegLogMode log_mode = LogRegWrites) {
   1069     set_vreg(code, value, log_mode);
   1070   }
   1071 
   1072   void set_dreg(unsigned code, double value,
   1073                 RegLogMode log_mode = LogRegWrites) {
   1074     set_vreg(code, value, log_mode);
   1075   }
   1076 
   1077   void set_dreg_bits(unsigned code, uint64_t value,
   1078                 RegLogMode log_mode = LogRegWrites) {
   1079     set_vreg(code, value, log_mode);
   1080   }
   1081 
   1082   void set_qreg(unsigned code, qreg_t value,
   1083                 RegLogMode log_mode = LogRegWrites) {
   1084     set_vreg(code, value, log_mode);
   1085   }
   1086 
   1087   bool N() const { return nzcv_.N() != 0; }
   1088   bool Z() const { return nzcv_.Z() != 0; }
   1089   bool C() const { return nzcv_.C() != 0; }
   1090   bool V() const { return nzcv_.V() != 0; }
   1091   SimSystemRegister& nzcv() { return nzcv_; }
   1092 
   1093   // TODO: Find a way to make the fpcr_ members return the proper types, so
   1094   // these accessors are not necessary.
   1095   FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); }
   1096   bool DN() { return fpcr_.DN() != 0; }
   1097   SimSystemRegister& fpcr() { return fpcr_; }
   1098 
   1099   // Specify relevant register formats for Print(V)Register and related helpers.
   1100   enum PrintRegisterFormat {
   1101     // The lane size.
   1102     kPrintRegLaneSizeB = 0 << 0,
   1103     kPrintRegLaneSizeH = 1 << 0,
   1104     kPrintRegLaneSizeS = 2 << 0,
   1105     kPrintRegLaneSizeW = kPrintRegLaneSizeS,
   1106     kPrintRegLaneSizeD = 3 << 0,
   1107     kPrintRegLaneSizeX = kPrintRegLaneSizeD,
   1108     kPrintRegLaneSizeQ = 4 << 0,
   1109 
   1110     kPrintRegLaneSizeOffset = 0,
   1111     kPrintRegLaneSizeMask = 7 << 0,
   1112 
   1113     // The lane count.
   1114     kPrintRegAsScalar = 0,
   1115     kPrintRegAsDVector = 1 << 3,
   1116     kPrintRegAsQVector = 2 << 3,
   1117 
   1118     kPrintRegAsVectorMask = 3 << 3,
   1119 
   1120     // Indicate floating-point format lanes. (This flag is only supported for S-
   1121     // and D-sized lanes.)
   1122     kPrintRegAsFP = 1 << 5,
   1123 
   1124     // Supported combinations.
   1125 
   1126     kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,
   1127     kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,
   1128     kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
   1129     kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
   1130 
   1131     kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,
   1132     kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,
   1133     kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,
   1134     kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,
   1135     kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,
   1136     kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,
   1137     kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,
   1138     kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,
   1139     kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,
   1140     kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
   1141     kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,
   1142     kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,
   1143     kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,
   1144     kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,
   1145     kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
   1146     kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,
   1147     kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar
   1148   };
   1149 
   1150   unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
   1151     return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
   1152   }
   1153 
   1154   unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
   1155     return 1 << GetPrintRegLaneSizeInBytesLog2(format);
   1156   }
   1157 
   1158   unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
   1159     if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2;
   1160     if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2;
   1161 
   1162     // Scalar types.
   1163     return GetPrintRegLaneSizeInBytesLog2(format);
   1164   }
   1165 
   1166   unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
   1167     return 1 << GetPrintRegSizeInBytesLog2(format);
   1168   }
   1169 
   1170   unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
   1171     unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
   1172     unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
   1173     VIXL_ASSERT(reg_size_log2 >= lane_size_log2);
   1174     return 1 << (reg_size_log2 - lane_size_log2);
   1175   }
   1176 
   1177   PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
   1178                                                     unsigned lane_size);
   1179 
   1180   PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) {
   1181     return GetPrintRegisterFormatForSize(size, size);
   1182   }
   1183 
   1184   PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
   1185     switch (size) {
   1186       default: VIXL_UNREACHABLE(); return kPrintDReg;
   1187       case kDRegSizeInBytes: return kPrintDReg;
   1188       case kSRegSizeInBytes: return kPrintSReg;
   1189     }
   1190   }
   1191 
   1192   PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
   1193     if ((GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) ||
   1194         (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) {
   1195       return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
   1196     }
   1197     return format;
   1198   }
   1199 
   1200   template<typename T>
   1201   PrintRegisterFormat GetPrintRegisterFormat(T value) {
   1202     return GetPrintRegisterFormatForSize(sizeof(value));
   1203   }
   1204 
   1205   PrintRegisterFormat GetPrintRegisterFormat(double value) {
   1206     VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes);
   1207     return GetPrintRegisterFormatForSizeFP(sizeof(value));
   1208   }
   1209 
   1210   PrintRegisterFormat GetPrintRegisterFormat(float value) {
   1211     VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes);
   1212     return GetPrintRegisterFormatForSizeFP(sizeof(value));
   1213   }
   1214 
   1215   PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
   1216 
   1217   // Print all registers of the specified types.
   1218   void PrintRegisters();
   1219   void PrintVRegisters();
   1220   void PrintSystemRegisters();
   1221 
   1222   // As above, but only print the registers that have been updated.
   1223   void PrintWrittenRegisters();
   1224   void PrintWrittenVRegisters();
   1225 
   1226   // As above, but respect LOG_REG and LOG_VREG.
   1227   void LogWrittenRegisters() {
   1228     if (trace_parameters() & LOG_REGS) PrintWrittenRegisters();
   1229   }
   1230   void LogWrittenVRegisters() {
   1231     if (trace_parameters() & LOG_VREGS) PrintWrittenVRegisters();
   1232   }
   1233   void LogAllWrittenRegisters() {
   1234     LogWrittenRegisters();
   1235     LogWrittenVRegisters();
   1236   }
   1237 
   1238   // Print individual register values (after update).
   1239   void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
   1240   void PrintVRegister(unsigned code, PrintRegisterFormat format);
   1241   void PrintSystemRegister(SystemRegister id);
   1242 
   1243   // Like Print* (above), but respect trace_parameters().
   1244   void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
   1245     if (trace_parameters() & LOG_REGS) PrintRegister(code, r31mode);
   1246   }
   1247   void LogVRegister(unsigned code, PrintRegisterFormat format) {
   1248     if (trace_parameters() & LOG_VREGS) PrintVRegister(code, format);
   1249   }
   1250   void LogSystemRegister(SystemRegister id) {
   1251     if (trace_parameters() & LOG_SYSREGS) PrintSystemRegister(id);
   1252   }
   1253 
   1254   // Print memory accesses.
   1255   void PrintRead(uintptr_t address, unsigned reg_code,
   1256                  PrintRegisterFormat format);
   1257   void PrintWrite(uintptr_t address, unsigned reg_code,
   1258                  PrintRegisterFormat format);
   1259   void PrintVRead(uintptr_t address, unsigned reg_code,
   1260                   PrintRegisterFormat format, unsigned lane);
   1261   void PrintVWrite(uintptr_t address, unsigned reg_code,
   1262                    PrintRegisterFormat format, unsigned lane);
   1263 
   1264   // Like Print* (above), but respect trace_parameters().
   1265   void LogRead(uintptr_t address, unsigned reg_code,
   1266                PrintRegisterFormat format) {
   1267     if (trace_parameters() & LOG_REGS) PrintRead(address, reg_code, format);
   1268   }
   1269   void LogWrite(uintptr_t address, unsigned reg_code,
   1270                 PrintRegisterFormat format) {
   1271     if (trace_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format);
   1272   }
   1273   void LogVRead(uintptr_t address, unsigned reg_code,
   1274                 PrintRegisterFormat format, unsigned lane = 0) {
   1275     if (trace_parameters() & LOG_VREGS) {
   1276       PrintVRead(address, reg_code, format, lane);
   1277     }
   1278   }
   1279   void LogVWrite(uintptr_t address, unsigned reg_code,
   1280                  PrintRegisterFormat format, unsigned lane = 0) {
   1281     if (trace_parameters() & LOG_WRITE) {
   1282       PrintVWrite(address, reg_code, format, lane);
   1283     }
   1284   }
   1285 
   1286   // Helper functions for register tracing.
   1287   void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
   1288                               int size_in_bytes = kXRegSizeInBytes);
   1289   void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSizeInBytes,
   1290                                int lsb = 0);
   1291   void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes,
   1292                               int lane_count = 1, int rightmost_lane = 0);
   1293 
   1294   void DoUnreachable(const Instruction* instr);
   1295   void DoTrace(const Instruction* instr);
   1296   void DoLog(const Instruction* instr);
   1297 
   1298   static const char* WRegNameForCode(unsigned code,
   1299                                      Reg31Mode mode = Reg31IsZeroRegister);
   1300   static const char* XRegNameForCode(unsigned code,
   1301                                      Reg31Mode mode = Reg31IsZeroRegister);
   1302   static const char* SRegNameForCode(unsigned code);
   1303   static const char* DRegNameForCode(unsigned code);
   1304   static const char* VRegNameForCode(unsigned code);
   1305 
   1306   bool coloured_trace() const { return coloured_trace_; }
   1307   void set_coloured_trace(bool value);
   1308 
   1309   int trace_parameters() const { return trace_parameters_; }
   1310   void set_trace_parameters(int parameters);
   1311 
   1312   void set_instruction_stats(bool value);
   1313 
   1314   // Clear the simulated local monitor to force the next store-exclusive
   1315   // instruction to fail.
   1316   void ClearLocalMonitor() {
   1317     local_monitor_.Clear();
   1318   }
   1319 
   1320   void SilenceExclusiveAccessWarning() {
   1321     print_exclusive_access_warning_ = false;
   1322   }
   1323 
   1324  protected:
   1325   const char* clr_normal;
   1326   const char* clr_flag_name;
   1327   const char* clr_flag_value;
   1328   const char* clr_reg_name;
   1329   const char* clr_reg_value;
   1330   const char* clr_vreg_name;
   1331   const char* clr_vreg_value;
   1332   const char* clr_memory_address;
   1333   const char* clr_warning;
   1334   const char* clr_warning_message;
   1335   const char* clr_printf;
   1336 
   1337   // Simulation helpers ------------------------------------
   1338   bool ConditionPassed(Condition cond) {
   1339     switch (cond) {
   1340       case eq:
   1341         return Z();
   1342       case ne:
   1343         return !Z();
   1344       case hs:
   1345         return C();
   1346       case lo:
   1347         return !C();
   1348       case mi:
   1349         return N();
   1350       case pl:
   1351         return !N();
   1352       case vs:
   1353         return V();
   1354       case vc:
   1355         return !V();
   1356       case hi:
   1357         return C() && !Z();
   1358       case ls:
   1359         return !(C() && !Z());
   1360       case ge:
   1361         return N() == V();
   1362       case lt:
   1363         return N() != V();
   1364       case gt:
   1365         return !Z() && (N() == V());
   1366       case le:
   1367         return !(!Z() && (N() == V()));
   1368       case nv:
   1369         VIXL_FALLTHROUGH();
   1370       case al:
   1371         return true;
   1372       default:
   1373         VIXL_UNREACHABLE();
   1374         return false;
   1375     }
   1376   }
   1377 
   1378   bool ConditionPassed(Instr cond) {
   1379     return ConditionPassed(static_cast<Condition>(cond));
   1380   }
   1381 
   1382   bool ConditionFailed(Condition cond) {
   1383     return !ConditionPassed(cond);
   1384   }
   1385 
   1386   void AddSubHelper(const Instruction* instr, int64_t op2);
   1387   int64_t AddWithCarry(unsigned reg_size,
   1388                        bool set_flags,
   1389                        int64_t src1,
   1390                        int64_t src2,
   1391                        int64_t carry_in = 0);
   1392   void LogicalHelper(const Instruction* instr, int64_t op2);
   1393   void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
   1394   void LoadStoreHelper(const Instruction* instr,
   1395                        int64_t offset,
   1396                        AddrMode addrmode);
   1397   void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
   1398   uintptr_t AddressModeHelper(unsigned addr_reg,
   1399                               int64_t offset,
   1400                               AddrMode addrmode);
   1401   void NEONLoadStoreMultiStructHelper(const Instruction* instr,
   1402                                       AddrMode addr_mode);
   1403   void NEONLoadStoreSingleStructHelper(const Instruction* instr,
   1404                                        AddrMode addr_mode);
   1405 
   1406   uint64_t AddressUntag(uint64_t address) {
   1407     return address & ~kAddressTagMask;
   1408   }
   1409 
   1410   template <typename T>
   1411   T* AddressUntag(T* address) {
   1412     uintptr_t address_raw = reinterpret_cast<uintptr_t>(address);
   1413     return reinterpret_cast<T*>(AddressUntag(address_raw));
   1414   }
   1415 
   1416   int64_t ShiftOperand(unsigned reg_size,
   1417                        int64_t value,
   1418                        Shift shift_type,
   1419                        unsigned amount);
   1420   int64_t Rotate(unsigned reg_width,
   1421                  int64_t value,
   1422                  Shift shift_type,
   1423                  unsigned amount);
   1424   int64_t ExtendValue(unsigned reg_width,
   1425                       int64_t value,
   1426                       Extend extend_type,
   1427                       unsigned left_shift = 0);
   1428 
   1429   enum ReverseByteMode {
   1430     Reverse16 = 0,
   1431     Reverse32 = 1,
   1432     Reverse64 = 2
   1433   };
   1434   uint64_t ReverseBytes(uint64_t value, ReverseByteMode mode);
   1435   uint64_t ReverseBits(uint64_t value, unsigned num_bits);
   1436   uint16_t PolynomialMult(uint8_t op1, uint8_t op2);
   1437 
   1438   void ld1(VectorFormat vform,
   1439            LogicVRegister dst,
   1440            uint64_t addr);
   1441   void ld1(VectorFormat vform,
   1442            LogicVRegister dst,
   1443            int index,
   1444            uint64_t addr);
   1445   void ld1r(VectorFormat vform,
   1446             LogicVRegister dst,
   1447             uint64_t addr);
   1448   void ld2(VectorFormat vform,
   1449            LogicVRegister dst1,
   1450            LogicVRegister dst2,
   1451            uint64_t addr);
   1452   void ld2(VectorFormat vform,
   1453            LogicVRegister dst1,
   1454            LogicVRegister dst2,
   1455            int index,
   1456            uint64_t addr);
   1457   void ld2r(VectorFormat vform,
   1458            LogicVRegister dst1,
   1459            LogicVRegister dst2,
   1460            uint64_t addr);
   1461   void ld3(VectorFormat vform,
   1462            LogicVRegister dst1,
   1463            LogicVRegister dst2,
   1464            LogicVRegister dst3,
   1465            uint64_t addr);
   1466   void ld3(VectorFormat vform,
   1467            LogicVRegister dst1,
   1468            LogicVRegister dst2,
   1469            LogicVRegister dst3,
   1470            int index,
   1471            uint64_t addr);
   1472   void ld3r(VectorFormat vform,
   1473            LogicVRegister dst1,
   1474            LogicVRegister dst2,
   1475            LogicVRegister dst3,
   1476            uint64_t addr);
   1477   void ld4(VectorFormat vform,
   1478            LogicVRegister dst1,
   1479            LogicVRegister dst2,
   1480            LogicVRegister dst3,
   1481            LogicVRegister dst4,
   1482            uint64_t addr);
   1483   void ld4(VectorFormat vform,
   1484            LogicVRegister dst1,
   1485            LogicVRegister dst2,
   1486            LogicVRegister dst3,
   1487            LogicVRegister dst4,
   1488            int index,
   1489            uint64_t addr);
   1490   void ld4r(VectorFormat vform,
   1491            LogicVRegister dst1,
   1492            LogicVRegister dst2,
   1493            LogicVRegister dst3,
   1494            LogicVRegister dst4,
   1495            uint64_t addr);
   1496   void st1(VectorFormat vform,
   1497            LogicVRegister src,
   1498            uint64_t addr);
   1499   void st1(VectorFormat vform,
   1500            LogicVRegister src,
   1501            int index,
   1502            uint64_t addr);
   1503   void st2(VectorFormat vform,
   1504            LogicVRegister src,
   1505            LogicVRegister src2,
   1506            uint64_t addr);
   1507   void st2(VectorFormat vform,
   1508            LogicVRegister src,
   1509            LogicVRegister src2,
   1510            int index,
   1511            uint64_t addr);
   1512   void st3(VectorFormat vform,
   1513            LogicVRegister src,
   1514            LogicVRegister src2,
   1515            LogicVRegister src3,
   1516            uint64_t addr);
   1517   void st3(VectorFormat vform,
   1518            LogicVRegister src,
   1519            LogicVRegister src2,
   1520            LogicVRegister src3,
   1521            int index,
   1522            uint64_t addr);
   1523   void st4(VectorFormat vform,
   1524            LogicVRegister src,
   1525            LogicVRegister src2,
   1526            LogicVRegister src3,
   1527            LogicVRegister src4,
   1528            uint64_t addr);
   1529   void st4(VectorFormat vform,
   1530            LogicVRegister src,
   1531            LogicVRegister src2,
   1532            LogicVRegister src3,
   1533            LogicVRegister src4,
   1534            int index,
   1535            uint64_t addr);
   1536   LogicVRegister cmp(VectorFormat vform,
   1537                      LogicVRegister dst,
   1538                      const LogicVRegister& src1,
   1539                      const LogicVRegister& src2,
   1540                      Condition cond);
   1541   LogicVRegister cmp(VectorFormat vform,
   1542                      LogicVRegister dst,
   1543                      const LogicVRegister& src1,
   1544                      int imm,
   1545                      Condition cond);
   1546   LogicVRegister cmptst(VectorFormat vform,
   1547                         LogicVRegister dst,
   1548                         const LogicVRegister& src1,
   1549                         const LogicVRegister& src2);
   1550   LogicVRegister add(VectorFormat vform,
   1551                      LogicVRegister dst,
   1552                      const LogicVRegister& src1,
   1553                      const LogicVRegister& src2);
   1554   LogicVRegister addp(VectorFormat vform,
   1555                       LogicVRegister dst,
   1556                       const LogicVRegister& src1,
   1557                       const LogicVRegister& src2);
   1558   LogicVRegister mla(VectorFormat vform,
   1559                      LogicVRegister dst,
   1560                      const LogicVRegister& src1,
   1561                      const LogicVRegister& src2);
   1562   LogicVRegister mls(VectorFormat vform,
   1563                      LogicVRegister dst,
   1564                      const LogicVRegister& src1,
   1565                      const LogicVRegister& src2);
   1566   LogicVRegister mul(VectorFormat vform,
   1567                      LogicVRegister dst,
   1568                      const LogicVRegister& src1,
   1569                      const LogicVRegister& src2);
   1570   LogicVRegister mul(VectorFormat vform,
   1571                      LogicVRegister dst,
   1572                      const LogicVRegister& src1,
   1573                      const LogicVRegister& src2,
   1574                      int index);
   1575   LogicVRegister mla(VectorFormat vform,
   1576                      LogicVRegister dst,
   1577                      const LogicVRegister& src1,
   1578                      const LogicVRegister& src2,
   1579                      int index);
   1580   LogicVRegister mls(VectorFormat vform,
   1581                      LogicVRegister dst,
   1582                      const LogicVRegister& src1,
   1583                      const LogicVRegister& src2,
   1584                      int index);
   1585   LogicVRegister pmul(VectorFormat vform,
   1586                       LogicVRegister dst,
   1587                       const LogicVRegister& src1,
   1588                       const LogicVRegister& src2);
   1589 
   1590   typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
   1591                                                    LogicVRegister dst,
   1592                                                    const LogicVRegister& src1,
   1593                                                    const LogicVRegister& src2,
   1594                                                    int index);
   1595   LogicVRegister fmul(VectorFormat vform,
   1596                       LogicVRegister dst,
   1597                       const LogicVRegister& src1,
   1598                       const LogicVRegister& src2,
   1599                       int index);
   1600   LogicVRegister fmla(VectorFormat vform,
   1601                       LogicVRegister dst,
   1602                       const LogicVRegister& src1,
   1603                       const LogicVRegister& src2,
   1604                       int index);
   1605   LogicVRegister fmls(VectorFormat vform,
   1606                       LogicVRegister dst,
   1607                       const LogicVRegister& src1,
   1608                       const LogicVRegister& src2,
   1609                       int index);
   1610   LogicVRegister fmulx(VectorFormat vform,
   1611                        LogicVRegister dst,
   1612                        const LogicVRegister& src1,
   1613                        const LogicVRegister& src2,
   1614                        int index);
   1615   LogicVRegister smull(VectorFormat vform,
   1616                        LogicVRegister dst,
   1617                        const LogicVRegister& src1,
   1618                        const LogicVRegister& src2,
   1619                        int index);
   1620   LogicVRegister smull2(VectorFormat vform,
   1621                         LogicVRegister dst,
   1622                         const LogicVRegister& src1,
   1623                         const LogicVRegister& src2,
   1624                         int index);
   1625   LogicVRegister umull(VectorFormat vform,
   1626                        LogicVRegister dst,
   1627                        const LogicVRegister& src1,
   1628                        const LogicVRegister& src2,
   1629                        int index);
   1630   LogicVRegister umull2(VectorFormat vform,
   1631                         LogicVRegister dst,
   1632                         const LogicVRegister& src1,
   1633                         const LogicVRegister& src2,
   1634                         int index);
   1635   LogicVRegister smlal(VectorFormat vform,
   1636                        LogicVRegister dst,
   1637                        const LogicVRegister& src1,
   1638                        const LogicVRegister& src2,
   1639                        int index);
   1640   LogicVRegister smlal2(VectorFormat vform,
   1641                         LogicVRegister dst,
   1642                         const LogicVRegister& src1,
   1643                         const LogicVRegister& src2,
   1644                         int index);
   1645   LogicVRegister umlal(VectorFormat vform,
   1646                        LogicVRegister dst,
   1647                        const LogicVRegister& src1,
   1648                        const LogicVRegister& src2,
   1649                        int index);
   1650   LogicVRegister umlal2(VectorFormat vform,
   1651                         LogicVRegister dst,
   1652                         const LogicVRegister& src1,
   1653                         const LogicVRegister& src2,
   1654                         int index);
   1655   LogicVRegister smlsl(VectorFormat vform,
   1656                        LogicVRegister dst,
   1657                        const LogicVRegister& src1,
   1658                        const LogicVRegister& src2,
   1659                        int index);
   1660   LogicVRegister smlsl2(VectorFormat vform,
   1661                         LogicVRegister dst,
   1662                         const LogicVRegister& src1,
   1663                         const LogicVRegister& src2,
   1664                         int index);
   1665   LogicVRegister umlsl(VectorFormat vform,
   1666                        LogicVRegister dst,
   1667                        const LogicVRegister& src1,
   1668                        const LogicVRegister& src2,
   1669                        int index);
   1670   LogicVRegister umlsl2(VectorFormat vform,
   1671                         LogicVRegister dst,
   1672                         const LogicVRegister& src1,
   1673                         const LogicVRegister& src2,
   1674                         int index);
   1675   LogicVRegister sqdmull(VectorFormat vform,
   1676                          LogicVRegister dst,
   1677                          const LogicVRegister& src1,
   1678                          const LogicVRegister& src2,
   1679                          int index);
   1680   LogicVRegister sqdmull2(VectorFormat vform,
   1681                           LogicVRegister dst,
   1682                           const LogicVRegister& src1,
   1683                           const LogicVRegister& src2,
   1684                           int index);
   1685   LogicVRegister sqdmlal(VectorFormat vform,
   1686                          LogicVRegister dst,
   1687                          const LogicVRegister& src1,
   1688                          const LogicVRegister& src2,
   1689                          int index);
   1690   LogicVRegister sqdmlal2(VectorFormat vform,
   1691                           LogicVRegister dst,
   1692                           const LogicVRegister& src1,
   1693                           const LogicVRegister& src2,
   1694                           int index);
   1695   LogicVRegister sqdmlsl(VectorFormat vform,
   1696                          LogicVRegister dst,
   1697                          const LogicVRegister& src1,
   1698                          const LogicVRegister& src2,
   1699                          int index);
   1700   LogicVRegister sqdmlsl2(VectorFormat vform,
   1701                           LogicVRegister dst,
   1702                           const LogicVRegister& src1,
   1703                           const LogicVRegister& src2,
   1704                           int index);
   1705   LogicVRegister sqdmulh(VectorFormat vform,
   1706                          LogicVRegister dst,
   1707                          const LogicVRegister& src1,
   1708                          const LogicVRegister& src2,
   1709                          int index);
   1710   LogicVRegister sqrdmulh(VectorFormat vform,
   1711                           LogicVRegister dst,
   1712                           const LogicVRegister& src1,
   1713                           const LogicVRegister& src2,
   1714                           int index);
   1715   LogicVRegister sub(VectorFormat vform,
   1716                      LogicVRegister dst,
   1717                      const LogicVRegister& src1,
   1718                      const LogicVRegister& src2);
   1719   LogicVRegister and_(VectorFormat vform,
   1720                       LogicVRegister dst,
   1721                       const LogicVRegister& src1,
   1722                       const LogicVRegister& src2);
   1723   LogicVRegister orr(VectorFormat vform,
   1724                      LogicVRegister dst,
   1725                      const LogicVRegister& src1,
   1726                      const LogicVRegister& src2);
   1727   LogicVRegister orn(VectorFormat vform,
   1728                      LogicVRegister dst,
   1729                      const LogicVRegister& src1,
   1730                      const LogicVRegister& src2);
   1731   LogicVRegister eor(VectorFormat vform,
   1732                      LogicVRegister dst,
   1733                      const LogicVRegister& src1,
   1734                      const LogicVRegister& src2);
   1735   LogicVRegister bic(VectorFormat vform,
   1736                      LogicVRegister dst,
   1737                      const LogicVRegister& src1,
   1738                      const LogicVRegister& src2);
   1739   LogicVRegister bic(VectorFormat vform,
   1740                      LogicVRegister dst,
   1741                      const LogicVRegister& src,
   1742                      uint64_t imm);
   1743   LogicVRegister bif(VectorFormat vform,
   1744                      LogicVRegister dst,
   1745                      const LogicVRegister& src1,
   1746                      const LogicVRegister& src2);
   1747   LogicVRegister bit(VectorFormat vform,
   1748                      LogicVRegister dst,
   1749                      const LogicVRegister& src1,
   1750                      const LogicVRegister& src2);
   1751   LogicVRegister bsl(VectorFormat vform,
   1752                      LogicVRegister dst,
   1753                      const LogicVRegister& src1,
   1754                      const LogicVRegister& src2);
   1755   LogicVRegister cls(VectorFormat vform,
   1756                      LogicVRegister dst,
   1757                      const LogicVRegister& src);
   1758   LogicVRegister clz(VectorFormat vform,
   1759                      LogicVRegister dst,
   1760                      const LogicVRegister& src);
   1761   LogicVRegister cnt(VectorFormat vform,
   1762                      LogicVRegister dst,
   1763                      const LogicVRegister& src);
   1764   LogicVRegister not_(VectorFormat vform,
   1765                       LogicVRegister dst,
   1766                       const LogicVRegister& src);
   1767   LogicVRegister rbit(VectorFormat vform,
   1768                       LogicVRegister dst,
   1769                       const LogicVRegister& src);
   1770   LogicVRegister rev(VectorFormat vform,
   1771                      LogicVRegister dst,
   1772                      const LogicVRegister& src,
   1773                      int revSize);
   1774   LogicVRegister rev16(VectorFormat vform,
   1775                        LogicVRegister dst,
   1776                        const LogicVRegister& src);
   1777   LogicVRegister rev32(VectorFormat vform,
   1778                        LogicVRegister dst,
   1779                        const LogicVRegister& src);
   1780   LogicVRegister rev64(VectorFormat vform,
   1781                        LogicVRegister dst,
   1782                        const LogicVRegister& src);
   1783   LogicVRegister addlp(VectorFormat vform,
   1784                        LogicVRegister dst,
   1785                        const LogicVRegister& src,
   1786                        bool is_signed,
   1787                        bool do_accumulate);
   1788   LogicVRegister saddlp(VectorFormat vform,
   1789                         LogicVRegister dst,
   1790                         const LogicVRegister& src);
   1791   LogicVRegister uaddlp(VectorFormat vform,
   1792                         LogicVRegister dst,
   1793                         const LogicVRegister& src);
   1794   LogicVRegister sadalp(VectorFormat vform,
   1795                         LogicVRegister dst,
   1796                         const LogicVRegister& src);
   1797   LogicVRegister uadalp(VectorFormat vform,
   1798                         LogicVRegister dst,
   1799                         const LogicVRegister& src);
   1800   LogicVRegister ext(VectorFormat vform,
   1801                      LogicVRegister dst,
   1802                      const LogicVRegister& src1,
   1803                      const LogicVRegister& src2,
   1804                      int index);
   1805   LogicVRegister ins_element(VectorFormat vform,
   1806                              LogicVRegister dst,
   1807                              int dst_index,
   1808                              const LogicVRegister& src,
   1809                              int src_index);
   1810   LogicVRegister ins_immediate(VectorFormat vform,
   1811                                LogicVRegister dst,
   1812                                int dst_index,
   1813                                uint64_t imm);
   1814   LogicVRegister dup_element(VectorFormat vform,
   1815                              LogicVRegister dst,
   1816                              const LogicVRegister& src,
   1817                              int src_index);
   1818   LogicVRegister dup_immediate(VectorFormat vform,
   1819                                LogicVRegister dst,
   1820                                uint64_t imm);
   1821   LogicVRegister movi(VectorFormat vform,
   1822                       LogicVRegister dst,
   1823                       uint64_t imm);
   1824   LogicVRegister mvni(VectorFormat vform,
   1825                       LogicVRegister dst,
   1826                       uint64_t imm);
   1827   LogicVRegister orr(VectorFormat vform,
   1828                      LogicVRegister dst,
   1829                      const LogicVRegister& src,
   1830                      uint64_t imm);
   1831   LogicVRegister sshl(VectorFormat vform,
   1832                       LogicVRegister dst,
   1833                       const LogicVRegister& src1,
   1834                       const LogicVRegister& src2);
   1835   LogicVRegister ushl(VectorFormat vform,
   1836                       LogicVRegister dst,
   1837                       const LogicVRegister& src1,
   1838                       const LogicVRegister& src2);
   1839   LogicVRegister sminmax(VectorFormat vform,
   1840                          LogicVRegister dst,
   1841                          const LogicVRegister& src1,
   1842                          const LogicVRegister& src2,
   1843                          bool max);
   1844   LogicVRegister smax(VectorFormat vform,
   1845                      LogicVRegister dst,
   1846                      const LogicVRegister& src1,
   1847                      const LogicVRegister& src2);
   1848   LogicVRegister smin(VectorFormat vform,
   1849                      LogicVRegister dst,
   1850                      const LogicVRegister& src1,
   1851                      const LogicVRegister& src2);
   1852   LogicVRegister sminmaxp(VectorFormat vform,
   1853                           LogicVRegister dst,
   1854                           int dst_index,
   1855                           const LogicVRegister& src,
   1856                           bool max);
   1857   LogicVRegister smaxp(VectorFormat vform,
   1858                        LogicVRegister dst,
   1859                        const LogicVRegister& src1,
   1860                        const LogicVRegister& src2);
   1861   LogicVRegister sminp(VectorFormat vform,
   1862                        LogicVRegister dst,
   1863                        const LogicVRegister& src1,
   1864                        const LogicVRegister& src2);
   1865   LogicVRegister addp(VectorFormat vform,
   1866                       LogicVRegister dst,
   1867                       const LogicVRegister& src);
   1868   LogicVRegister addv(VectorFormat vform,
   1869                       LogicVRegister dst,
   1870                       const LogicVRegister& src);
   1871   LogicVRegister uaddlv(VectorFormat vform,
   1872                         LogicVRegister dst,
   1873                         const LogicVRegister& src);
   1874   LogicVRegister saddlv(VectorFormat vform,
   1875                         LogicVRegister dst,
   1876                         const LogicVRegister& src);
   1877   LogicVRegister sminmaxv(VectorFormat vform,
   1878                           LogicVRegister dst,
   1879                           const LogicVRegister& src,
   1880                           bool max);
   1881   LogicVRegister smaxv(VectorFormat vform,
   1882                        LogicVRegister dst,
   1883                        const LogicVRegister& src);
   1884   LogicVRegister sminv(VectorFormat vform,
   1885                        LogicVRegister dst,
   1886                        const LogicVRegister& src);
   1887   LogicVRegister uxtl(VectorFormat vform,
   1888                       LogicVRegister dst,
   1889                       const LogicVRegister& src);
   1890   LogicVRegister uxtl2(VectorFormat vform,
   1891                        LogicVRegister dst,
   1892                        const LogicVRegister& src);
   1893   LogicVRegister sxtl(VectorFormat vform,
   1894                       LogicVRegister dst,
   1895                       const LogicVRegister& src);
   1896   LogicVRegister sxtl2(VectorFormat vform,
   1897                        LogicVRegister dst,
   1898                        const LogicVRegister& src);
   1899   LogicVRegister tbl(VectorFormat vform,
   1900                      LogicVRegister dst,
   1901                      const LogicVRegister& tab,
   1902                      const LogicVRegister& ind);
   1903   LogicVRegister tbl(VectorFormat vform,
   1904                      LogicVRegister dst,
   1905                      const LogicVRegister& tab,
   1906                      const LogicVRegister& tab2,
   1907                      const LogicVRegister& ind);
   1908   LogicVRegister tbl(VectorFormat vform,
   1909                      LogicVRegister dst,
   1910                      const LogicVRegister& tab,
   1911                      const LogicVRegister& tab2,
   1912                      const LogicVRegister& tab3,
   1913                      const LogicVRegister& ind);
   1914   LogicVRegister tbl(VectorFormat vform,
   1915                      LogicVRegister dst,
   1916                      const LogicVRegister& tab,
   1917                      const LogicVRegister& tab2,
   1918                      const LogicVRegister& tab3,
   1919                      const LogicVRegister& tab4,
   1920                      const LogicVRegister& ind);
   1921   LogicVRegister tbx(VectorFormat vform,
   1922                      LogicVRegister dst,
   1923                      const LogicVRegister& tab,
   1924                      const LogicVRegister& ind);
   1925   LogicVRegister tbx(VectorFormat vform,
   1926                      LogicVRegister dst,
   1927                      const LogicVRegister& tab,
   1928                      const LogicVRegister& tab2,
   1929                      const LogicVRegister& ind);
   1930   LogicVRegister tbx(VectorFormat vform,
   1931                      LogicVRegister dst,
   1932                      const LogicVRegister& tab,
   1933                      const LogicVRegister& tab2,
   1934                      const LogicVRegister& tab3,
   1935                      const LogicVRegister& ind);
   1936   LogicVRegister tbx(VectorFormat vform,
   1937                      LogicVRegister dst,
   1938                      const LogicVRegister& tab,
   1939                      const LogicVRegister& tab2,
   1940                      const LogicVRegister& tab3,
   1941                      const LogicVRegister& tab4,
   1942                      const LogicVRegister& ind);
   1943   LogicVRegister uaddl(VectorFormat vform,
   1944                        LogicVRegister dst,
   1945                        const LogicVRegister& src1,
   1946                        const LogicVRegister& src2);
   1947   LogicVRegister uaddl2(VectorFormat vform,
   1948                         LogicVRegister dst,
   1949                         const LogicVRegister& src1,
   1950                         const LogicVRegister& src2);
   1951   LogicVRegister uaddw(VectorFormat vform,
   1952                        LogicVRegister dst,
   1953                        const LogicVRegister& src1,
   1954                        const LogicVRegister& src2);
   1955   LogicVRegister uaddw2(VectorFormat vform,
   1956                         LogicVRegister dst,
   1957                         const LogicVRegister& src1,
   1958                         const LogicVRegister& src2);
   1959   LogicVRegister saddl(VectorFormat vform,
   1960                        LogicVRegister dst,
   1961                        const LogicVRegister& src1,
   1962                        const LogicVRegister& src2);
   1963   LogicVRegister saddl2(VectorFormat vform,
   1964                         LogicVRegister dst,
   1965                         const LogicVRegister& src1,
   1966                         const LogicVRegister& src2);
   1967   LogicVRegister saddw(VectorFormat vform,
   1968                        LogicVRegister dst,
   1969                        const LogicVRegister& src1,
   1970                        const LogicVRegister& src2);
   1971   LogicVRegister saddw2(VectorFormat vform,
   1972                         LogicVRegister dst,
   1973                         const LogicVRegister& src1,
   1974                         const LogicVRegister& src2);
   1975   LogicVRegister usubl(VectorFormat vform,
   1976                          LogicVRegister dst,
   1977                          const LogicVRegister& src1,
   1978                          const LogicVRegister& src2);
   1979   LogicVRegister usubl2(VectorFormat vform,
   1980                         LogicVRegister dst,
   1981                         const LogicVRegister& src1,
   1982                         const LogicVRegister& src2);
   1983   LogicVRegister usubw(VectorFormat vform,
   1984                        LogicVRegister dst,
   1985                        const LogicVRegister& src1,
   1986                        const LogicVRegister& src2);
   1987   LogicVRegister usubw2(VectorFormat vform,
   1988                         LogicVRegister dst,
   1989                         const LogicVRegister& src1,
   1990                         const LogicVRegister& src2);
   1991   LogicVRegister ssubl(VectorFormat vform,
   1992                        LogicVRegister dst,
   1993                        const LogicVRegister& src1,
   1994                        const LogicVRegister& src2);
   1995   LogicVRegister ssubl2(VectorFormat vform,
   1996                         LogicVRegister dst,
   1997                         const LogicVRegister& src1,
   1998                         const LogicVRegister& src2);
   1999   LogicVRegister ssubw(VectorFormat vform,
   2000                        LogicVRegister dst,
   2001                        const LogicVRegister& src1,
   2002                        const LogicVRegister& src2);
   2003   LogicVRegister ssubw2(VectorFormat vform,
   2004                         LogicVRegister dst,
   2005                         const LogicVRegister& src1,
   2006                         const LogicVRegister& src2);
   2007   LogicVRegister uminmax(VectorFormat vform,
   2008                          LogicVRegister dst,
   2009                          const LogicVRegister& src1,
   2010                          const LogicVRegister& src2,
   2011                          bool max);
   2012   LogicVRegister umax(VectorFormat vform,
   2013                      LogicVRegister dst,
   2014                      const LogicVRegister& src1,
   2015                      const LogicVRegister& src2);
   2016   LogicVRegister umin(VectorFormat vform,
   2017                      LogicVRegister dst,
   2018                      const LogicVRegister& src1,
   2019                      const LogicVRegister& src2);
   2020   LogicVRegister uminmaxp(VectorFormat vform,
   2021                           LogicVRegister dst,
   2022                           int dst_index,
   2023                           const LogicVRegister& src,
   2024                           bool max);
   2025   LogicVRegister umaxp(VectorFormat vform,
   2026                        LogicVRegister dst,
   2027                        const LogicVRegister& src1,
   2028                        const LogicVRegister& src2);
   2029   LogicVRegister uminp(VectorFormat vform,
   2030                        LogicVRegister dst,
   2031                        const LogicVRegister& src1,
   2032                        const LogicVRegister& src2);
   2033   LogicVRegister uminmaxv(VectorFormat vform,
   2034                           LogicVRegister dst,
   2035                           const LogicVRegister& src,
   2036                           bool max);
   2037   LogicVRegister umaxv(VectorFormat vform,
   2038                        LogicVRegister dst,
   2039                        const LogicVRegister& src);
   2040   LogicVRegister uminv(VectorFormat vform,
   2041                        LogicVRegister dst,
   2042                        const LogicVRegister& src);
   2043   LogicVRegister trn1(VectorFormat vform,
   2044                       LogicVRegister dst,
   2045                       const LogicVRegister& src1,
   2046                       const LogicVRegister& src2);
   2047   LogicVRegister trn2(VectorFormat vform,
   2048                       LogicVRegister dst,
   2049                       const LogicVRegister& src1,
   2050                       const LogicVRegister& src2);
   2051   LogicVRegister zip1(VectorFormat vform,
   2052                       LogicVRegister dst,
   2053                       const LogicVRegister& src1,
   2054                       const LogicVRegister& src2);
   2055   LogicVRegister zip2(VectorFormat vform,
   2056                       LogicVRegister dst,
   2057                       const LogicVRegister& src1,
   2058                       const LogicVRegister& src2);
   2059   LogicVRegister uzp1(VectorFormat vform,
   2060                       LogicVRegister dst,
   2061                       const LogicVRegister& src1,
   2062                       const LogicVRegister& src2);
   2063   LogicVRegister uzp2(VectorFormat vform,
   2064                       LogicVRegister dst,
   2065                       const LogicVRegister& src1,
   2066                       const LogicVRegister& src2);
   2067   LogicVRegister shl(VectorFormat vform,
   2068                      LogicVRegister dst,
   2069                      const LogicVRegister& src,
   2070                      int shift);
   2071   LogicVRegister scvtf(VectorFormat vform,
   2072                        LogicVRegister dst,
   2073                        const LogicVRegister& src,
   2074                        int fbits,
   2075                        FPRounding rounding_mode);
   2076   LogicVRegister ucvtf(VectorFormat vform,
   2077                        LogicVRegister dst,
   2078                        const LogicVRegister& src,
   2079                        int fbits,
   2080                        FPRounding rounding_mode);
   2081   LogicVRegister sshll(VectorFormat vform,
   2082                        LogicVRegister dst,
   2083                        const LogicVRegister& src,
   2084                        int shift);
   2085   LogicVRegister sshll2(VectorFormat vform,
   2086                         LogicVRegister dst,
   2087                         const LogicVRegister& src,
   2088                         int shift);
   2089   LogicVRegister shll(VectorFormat vform,
   2090                       LogicVRegister dst,
   2091                       const LogicVRegister& src);
   2092   LogicVRegister shll2(VectorFormat vform,
   2093                        LogicVRegister dst,
   2094                        const LogicVRegister& src);
   2095   LogicVRegister ushll(VectorFormat vform,
   2096                        LogicVRegister dst,
   2097                        const LogicVRegister& src,
   2098                        int shift);
   2099   LogicVRegister ushll2(VectorFormat vform,
   2100                         LogicVRegister dst,
   2101                         const LogicVRegister& src,
   2102                         int shift);
   2103   LogicVRegister sli(VectorFormat vform,
   2104                      LogicVRegister dst,
   2105                      const LogicVRegister& src,
   2106                      int shift);
   2107   LogicVRegister sri(VectorFormat vform,
   2108                      LogicVRegister dst,
   2109                      const LogicVRegister& src,
   2110                      int shift);
   2111   LogicVRegister sshr(VectorFormat vform,
   2112                       LogicVRegister dst,
   2113                       const LogicVRegister& src,
   2114                       int shift);
   2115   LogicVRegister ushr(VectorFormat vform,
   2116                       LogicVRegister dst,
   2117                       const LogicVRegister& src,
   2118                       int shift);
   2119   LogicVRegister ssra(VectorFormat vform,
   2120                       LogicVRegister dst,
   2121                       const LogicVRegister& src,
   2122                       int shift);
   2123   LogicVRegister usra(VectorFormat vform,
   2124                       LogicVRegister dst,
   2125                       const LogicVRegister& src,
   2126                       int shift);
   2127   LogicVRegister srsra(VectorFormat vform,
   2128                        LogicVRegister dst,
   2129                        const LogicVRegister& src,
   2130                        int shift);
   2131   LogicVRegister ursra(VectorFormat vform,
   2132                        LogicVRegister dst,
   2133                        const LogicVRegister& src,
   2134                        int shift);
   2135   LogicVRegister suqadd(VectorFormat vform,
   2136                        LogicVRegister dst,
   2137                        const LogicVRegister& src);
   2138   LogicVRegister usqadd(VectorFormat vform,
   2139                        LogicVRegister dst,
   2140                        const LogicVRegister& src);
   2141   LogicVRegister sqshl(VectorFormat vform,
   2142                        LogicVRegister dst,
   2143                        const LogicVRegister& src,
   2144                        int shift);
   2145   LogicVRegister uqshl(VectorFormat vform,
   2146                        LogicVRegister dst,
   2147                        const LogicVRegister& src,
   2148                        int shift);
   2149   LogicVRegister sqshlu(VectorFormat vform,
   2150                         LogicVRegister dst,
   2151                         const LogicVRegister& src,
   2152                         int shift);
   2153   LogicVRegister abs(VectorFormat vform,
   2154                      LogicVRegister dst,
   2155                      const LogicVRegister& src);
   2156   LogicVRegister neg(VectorFormat vform,
   2157                      LogicVRegister dst,
   2158                      const LogicVRegister& src);
   2159   LogicVRegister extractnarrow(VectorFormat vform,
   2160                                LogicVRegister dst,
   2161                                bool dstIsSigned,
   2162                                const LogicVRegister& src,
   2163                                bool srcIsSigned);
   2164   LogicVRegister xtn(VectorFormat vform,
   2165                      LogicVRegister dst,
   2166                      const LogicVRegister& src);
   2167   LogicVRegister sqxtn(VectorFormat vform,
   2168                        LogicVRegister dst,
   2169                        const LogicVRegister& src);
   2170   LogicVRegister uqxtn(VectorFormat vform,
   2171                        LogicVRegister dst,
   2172                        const LogicVRegister& src);
   2173   LogicVRegister sqxtun(VectorFormat vform,
   2174                         LogicVRegister dst,
   2175                         const LogicVRegister& src);
   2176   LogicVRegister absdiff(VectorFormat vform,
   2177                          LogicVRegister dst,
   2178                          const LogicVRegister& src1,
   2179                          const LogicVRegister& src2,
   2180                          bool issigned);
   2181   LogicVRegister saba(VectorFormat vform,
   2182                       LogicVRegister dst,
   2183                       const LogicVRegister& src1,
   2184                       const LogicVRegister& src2);
   2185   LogicVRegister uaba(VectorFormat vform,
   2186                       LogicVRegister dst,
   2187                       const LogicVRegister& src1,
   2188                       const LogicVRegister& src2);
   2189   LogicVRegister shrn(VectorFormat vform,
   2190                       LogicVRegister dst,
   2191                       const LogicVRegister& src,
   2192                       int shift);
   2193   LogicVRegister shrn2(VectorFormat vform,
   2194                       LogicVRegister dst,
   2195                       const LogicVRegister& src,
   2196                       int shift);
   2197   LogicVRegister rshrn(VectorFormat vform,
   2198                        LogicVRegister dst,
   2199                        const LogicVRegister& src,
   2200                        int shift);
   2201   LogicVRegister rshrn2(VectorFormat vform,
   2202                         LogicVRegister dst,
   2203                         const LogicVRegister& src,
   2204                         int shift);
   2205   LogicVRegister uqshrn(VectorFormat vform,
   2206                         LogicVRegister dst,
   2207                         const LogicVRegister& src,
   2208                         int shift);
   2209   LogicVRegister uqshrn2(VectorFormat vform,
   2210                          LogicVRegister dst,
   2211                          const LogicVRegister& src,
   2212                          int shift);
   2213   LogicVRegister uqrshrn(VectorFormat vform,
   2214                          LogicVRegister dst,
   2215                          const LogicVRegister& src,
   2216                          int shift);
   2217   LogicVRegister uqrshrn2(VectorFormat vform,
   2218                           LogicVRegister dst,
   2219                           const LogicVRegister& src,
   2220                           int shift);
   2221   LogicVRegister sqshrn(VectorFormat vform,
   2222                         LogicVRegister dst,
   2223                         const LogicVRegister& src,
   2224                         int shift);
   2225   LogicVRegister sqshrn2(VectorFormat vform,
   2226                          LogicVRegister dst,
   2227                          const LogicVRegister& src,
   2228                          int shift);
   2229   LogicVRegister sqrshrn(VectorFormat vform,
   2230                          LogicVRegister dst,
   2231                          const LogicVRegister& src,
   2232                          int shift);
   2233   LogicVRegister sqrshrn2(VectorFormat vform,
   2234                           LogicVRegister dst,
   2235                           const LogicVRegister& src,
   2236                           int shift);
   2237   LogicVRegister sqshrun(VectorFormat vform,
   2238                          LogicVRegister dst,
   2239                          const LogicVRegister& src,
   2240                          int shift);
   2241   LogicVRegister sqshrun2(VectorFormat vform,
   2242                           LogicVRegister dst,
   2243                           const LogicVRegister& src,
   2244                           int shift);
   2245   LogicVRegister sqrshrun(VectorFormat vform,
   2246                           LogicVRegister dst,
   2247                           const LogicVRegister& src,
   2248                           int shift);
   2249   LogicVRegister sqrshrun2(VectorFormat vform,
   2250                            LogicVRegister dst,
   2251                            const LogicVRegister& src,
   2252                            int shift);
   2253   LogicVRegister sqrdmulh(VectorFormat vform,
   2254                           LogicVRegister dst,
   2255                           const LogicVRegister& src1,
   2256                           const LogicVRegister& src2,
   2257                           bool round = true);
   2258   LogicVRegister sqdmulh(VectorFormat vform,
   2259                          LogicVRegister dst,
   2260                          const LogicVRegister& src1,
   2261                          const LogicVRegister& src2);
   2262   #define NEON_3VREG_LOGIC_LIST(V) \
   2263     V(addhn)                       \
   2264     V(addhn2)                      \
   2265     V(raddhn)                      \
   2266     V(raddhn2)                     \
   2267     V(subhn)                       \
   2268     V(subhn2)                      \
   2269     V(rsubhn)                      \
   2270     V(rsubhn2)                     \
   2271     V(pmull)                       \
   2272     V(pmull2)                      \
   2273     V(sabal)                       \
   2274     V(sabal2)                      \
   2275     V(uabal)                       \
   2276     V(uabal2)                      \
   2277     V(sabdl)                       \
   2278     V(sabdl2)                      \
   2279     V(uabdl)                       \
   2280     V(uabdl2)                      \
   2281     V(smull)                       \
   2282     V(smull2)                      \
   2283     V(umull)                       \
   2284     V(umull2)                      \
   2285     V(smlal)                       \
   2286     V(smlal2)                      \
   2287     V(umlal)                       \
   2288     V(umlal2)                      \
   2289     V(smlsl)                       \
   2290     V(smlsl2)                      \
   2291     V(umlsl)                       \
   2292     V(umlsl2)                      \
   2293     V(sqdmlal)                     \
   2294     V(sqdmlal2)                    \
   2295     V(sqdmlsl)                     \
   2296     V(sqdmlsl2)                    \
   2297     V(sqdmull)                     \
   2298     V(sqdmull2)
   2299 
   2300   #define DEFINE_LOGIC_FUNC(FXN)                   \
   2301     LogicVRegister FXN(VectorFormat vform,         \
   2302                        LogicVRegister dst,         \
   2303                        const LogicVRegister& src1, \
   2304                        const LogicVRegister& src2);
   2305   NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
   2306   #undef DEFINE_LOGIC_FUNC
   2307 
   2308   #define NEON_FP3SAME_LIST(V)  \
   2309     V(fadd,   FPAdd,   false)   \
   2310     V(fsub,   FPSub,   true)    \
   2311     V(fmul,   FPMul,   true)    \
   2312     V(fmulx,  FPMulx,  true)    \
   2313     V(fdiv,   FPDiv,   true)    \
   2314     V(fmax,   FPMax,   false)   \
   2315     V(fmin,   FPMin,   false)   \
   2316     V(fmaxnm, FPMaxNM, false)   \
   2317     V(fminnm, FPMinNM, false)
   2318 
   2319   #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
   2320     template <typename T>                            \
   2321     LogicVRegister FN(VectorFormat vform,            \
   2322                       LogicVRegister dst,            \
   2323                       const LogicVRegister& src1,    \
   2324                       const LogicVRegister& src2);   \
   2325     LogicVRegister FN(VectorFormat vform,            \
   2326                       LogicVRegister dst,            \
   2327                       const LogicVRegister& src1,    \
   2328                       const LogicVRegister& src2);
   2329   NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
   2330   #undef DECLARE_NEON_FP_VECTOR_OP
   2331 
   2332   #define NEON_FPPAIRWISE_LIST(V)         \
   2333     V(faddp,   fadd,   FPAdd)             \
   2334     V(fmaxp,   fmax,   FPMax)             \
   2335     V(fmaxnmp, fmaxnm, FPMaxNM)           \
   2336     V(fminp,   fmin,   FPMin)             \
   2337     V(fminnmp, fminnm, FPMinNM)
   2338 
   2339   #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP)       \
   2340     LogicVRegister FNP(VectorFormat vform,           \
   2341                        LogicVRegister dst,           \
   2342                        const LogicVRegister& src1,   \
   2343                        const LogicVRegister& src2);  \
   2344     LogicVRegister FNP(VectorFormat vform,           \
   2345                        LogicVRegister dst,           \
   2346                        const LogicVRegister& src);
   2347   NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
   2348   #undef DECLARE_NEON_FP_PAIR_OP
   2349 
   2350   template <typename T>
   2351   LogicVRegister frecps(VectorFormat vform,
   2352                         LogicVRegister dst,
   2353                         const LogicVRegister& src1,
   2354                         const LogicVRegister& src2);
   2355   LogicVRegister frecps(VectorFormat vform,
   2356                         LogicVRegister dst,
   2357                         const LogicVRegister& src1,
   2358                         const LogicVRegister& src2);
   2359   template <typename T>
   2360   LogicVRegister frsqrts(VectorFormat vform,
   2361                          LogicVRegister dst,
   2362                          const LogicVRegister& src1,
   2363                          const LogicVRegister& src2);
   2364   LogicVRegister frsqrts(VectorFormat vform,
   2365                          LogicVRegister dst,
   2366                          const LogicVRegister& src1,
   2367                          const LogicVRegister& src2);
   2368   template <typename T>
   2369   LogicVRegister fmla(VectorFormat vform,
   2370                       LogicVRegister dst,
   2371                       const LogicVRegister& src1,
   2372                       const LogicVRegister& src2);
   2373   LogicVRegister fmla(VectorFormat vform,
   2374                       LogicVRegister dst,
   2375                       const LogicVRegister& src1,
   2376                       const LogicVRegister& src2);
   2377   template <typename T>
   2378   LogicVRegister fmls(VectorFormat vform,
   2379                       LogicVRegister dst,
   2380                       const LogicVRegister& src1,
   2381                       const LogicVRegister& src2);
   2382   LogicVRegister fmls(VectorFormat vform,
   2383                       LogicVRegister dst,
   2384                       const LogicVRegister& src1,
   2385                       const LogicVRegister& src2);
   2386   LogicVRegister fnmul(VectorFormat vform,
   2387                        LogicVRegister dst,
   2388                        const LogicVRegister& src1,
   2389                        const LogicVRegister& src2);
   2390 
   2391   template <typename T>
   2392   LogicVRegister fcmp(VectorFormat vform,
   2393                       LogicVRegister dst,
   2394                       const LogicVRegister& src1,
   2395                       const LogicVRegister& src2,
   2396                       Condition cond);
   2397   LogicVRegister fcmp(VectorFormat vform,
   2398                       LogicVRegister dst,
   2399                       const LogicVRegister& src1,
   2400                       const LogicVRegister& src2,
   2401                       Condition cond);
   2402   LogicVRegister fabscmp(VectorFormat vform,
   2403                          LogicVRegister dst,
   2404                          const LogicVRegister& src1,
   2405                          const LogicVRegister& src2,
   2406                          Condition cond);
   2407   LogicVRegister fcmp_zero(VectorFormat vform,
   2408                            LogicVRegister dst,
   2409                            const LogicVRegister& src,
   2410                            Condition cond);
   2411 
   2412   template <typename T>
   2413   LogicVRegister fneg(VectorFormat vform,
   2414                       LogicVRegister dst,
   2415                       const LogicVRegister& src);
   2416   LogicVRegister fneg(VectorFormat vform,
   2417                       LogicVRegister dst,
   2418                       const LogicVRegister& src);
   2419   template <typename T>
   2420   LogicVRegister frecpx(VectorFormat vform,
   2421                         LogicVRegister dst,
   2422                         const LogicVRegister& src);
   2423   LogicVRegister frecpx(VectorFormat vform,
   2424                         LogicVRegister dst,
   2425                         const LogicVRegister& src);
   2426   template <typename T>
   2427   LogicVRegister fabs_(VectorFormat vform,
   2428                        LogicVRegister dst,
   2429                        const LogicVRegister& src);
   2430   LogicVRegister fabs_(VectorFormat vform,
   2431                        LogicVRegister dst,
   2432                        const LogicVRegister& src);
   2433   LogicVRegister fabd(VectorFormat vform,
   2434                       LogicVRegister dst,
   2435                       const LogicVRegister& src1,
   2436                       const LogicVRegister& src2);
   2437   LogicVRegister frint(VectorFormat vform,
   2438                        LogicVRegister dst,
   2439                        const LogicVRegister& src,
   2440                        FPRounding rounding_mode,
   2441                        bool inexact_exception = false);
   2442   LogicVRegister fcvts(VectorFormat vform,
   2443                        LogicVRegister dst,
   2444                        const LogicVRegister& src,
   2445                        FPRounding rounding_mode,
   2446                        int fbits = 0);
   2447   LogicVRegister fcvtu(VectorFormat vform,
   2448                        LogicVRegister dst,
   2449                        const LogicVRegister& src,
   2450                        FPRounding rounding_mode,
   2451                        int fbits = 0);
   2452   LogicVRegister fcvtl(VectorFormat vform,
   2453                        LogicVRegister dst,
   2454                        const LogicVRegister& src);
   2455   LogicVRegister fcvtl2(VectorFormat vform,
   2456                         LogicVRegister dst,
   2457                         const LogicVRegister& src);
   2458   LogicVRegister fcvtn(VectorFormat vform,
   2459                        LogicVRegister dst,
   2460                        const LogicVRegister& src);
   2461   LogicVRegister fcvtn2(VectorFormat vform,
   2462                         LogicVRegister dst,
   2463                         const LogicVRegister& src);
   2464   LogicVRegister fcvtxn(VectorFormat vform,
   2465                         LogicVRegister dst,
   2466                         const LogicVRegister& src);
   2467   LogicVRegister fcvtxn2(VectorFormat vform,
   2468                          LogicVRegister dst,
   2469                          const LogicVRegister& src);
   2470   LogicVRegister fsqrt(VectorFormat vform,
   2471                        LogicVRegister dst,
   2472                        const LogicVRegister& src);
   2473   LogicVRegister frsqrte(VectorFormat vform,
   2474                          LogicVRegister dst,
   2475                          const LogicVRegister& src);
   2476   LogicVRegister frecpe(VectorFormat vform,
   2477                         LogicVRegister dst,
   2478                         const LogicVRegister& src,
   2479                         FPRounding rounding);
   2480   LogicVRegister ursqrte(VectorFormat vform,
   2481                          LogicVRegister dst,
   2482                          const LogicVRegister& src);
   2483   LogicVRegister urecpe(VectorFormat vform,
   2484                         LogicVRegister dst,
   2485                         const LogicVRegister& src);
   2486 
   2487   typedef float (Simulator::*FPMinMaxOp)(float a, float b);
   2488 
   2489   LogicVRegister fminmaxv(VectorFormat vform,
   2490                           LogicVRegister dst,
   2491                           const LogicVRegister& src,
   2492                           FPMinMaxOp Op);
   2493 
   2494   LogicVRegister fminv(VectorFormat vform,
   2495                        LogicVRegister dst,
   2496                        const LogicVRegister& src);
   2497   LogicVRegister fmaxv(VectorFormat vform,
   2498                        LogicVRegister dst,
   2499                        const LogicVRegister& src);
   2500   LogicVRegister fminnmv(VectorFormat vform,
   2501                          LogicVRegister dst,
   2502                          const LogicVRegister& src);
   2503   LogicVRegister fmaxnmv(VectorFormat vform,
   2504                          LogicVRegister dst,
   2505                          const LogicVRegister& src);
   2506 
   2507   static const uint32_t CRC32_POLY  = 0x04C11DB7;
   2508   static const uint32_t CRC32C_POLY = 0x1EDC6F41;
   2509   uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly);
   2510   template <typename T>
   2511   uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
   2512   uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
   2513 
   2514   void SysOp_W(int op, int64_t val);
   2515 
   2516   template <typename T>
   2517   T FPRecipSqrtEstimate(T op);
   2518   template <typename T>
   2519   T FPRecipEstimate(T op, FPRounding rounding);
   2520   template <typename T, typename R>
   2521   R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
   2522 
   2523   void FPCompare(double val0, double val1, FPTrapFlags trap);
   2524   double FPRoundInt(double value, FPRounding round_mode);
   2525   double FPToDouble(float value);
   2526   float FPToFloat(double value, FPRounding round_mode);
   2527   float FPToFloat(float16 value);
   2528   float16 FPToFloat16(float value, FPRounding round_mode);
   2529   float16 FPToFloat16(double value, FPRounding round_mode);
   2530   double recip_sqrt_estimate(double a);
   2531   double recip_estimate(double a);
   2532   double FPRecipSqrtEstimate(double a);
   2533   double FPRecipEstimate(double a);
   2534   double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
   2535   double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
   2536   float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
   2537   float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
   2538   int32_t FPToInt32(double value, FPRounding rmode);
   2539   int64_t FPToInt64(double value, FPRounding rmode);
   2540   uint32_t FPToUInt32(double value, FPRounding rmode);
   2541   uint64_t FPToUInt64(double value, FPRounding rmode);
   2542 
   2543   template <typename T>
   2544   T FPAdd(T op1, T op2);
   2545 
   2546   template <typename T>
   2547   T FPDiv(T op1, T op2);
   2548 
   2549   template <typename T>
   2550   T FPMax(T a, T b);
   2551 
   2552   template <typename T>
   2553   T FPMaxNM(T a, T b);
   2554 
   2555   template <typename T>
   2556   T FPMin(T a, T b);
   2557 
   2558   template <typename T>
   2559   T FPMinNM(T a, T b);
   2560 
   2561   template <typename T>
   2562   T FPMul(T op1, T op2);
   2563 
   2564   template <typename T>
   2565   T FPMulx(T op1, T op2);
   2566 
   2567   template <typename T>
   2568   T FPMulAdd(T a, T op1, T op2);
   2569 
   2570   template <typename T>
   2571   T FPSqrt(T op);
   2572 
   2573   template <typename T>
   2574   T FPSub(T op1, T op2);
   2575 
   2576   template <typename T>
   2577   T FPRecipStepFused(T op1, T op2);
   2578 
   2579   template <typename T>
   2580   T FPRSqrtStepFused(T op1, T op2);
   2581 
   2582   // This doesn't do anything at the moment. We'll need it if we want support
   2583   // for cumulative exception bits or floating-point exceptions.
   2584   void FPProcessException() { }
   2585 
   2586   bool FPProcessNaNs(const Instruction* instr);
   2587 
   2588   // Pseudo Printf instruction
   2589   void DoPrintf(const Instruction* instr);
   2590 
   2591   // Processor state ---------------------------------------
   2592 
   2593   // Simulated monitors for exclusive access instructions.
   2594   SimExclusiveLocalMonitor local_monitor_;
   2595   SimExclusiveGlobalMonitor global_monitor_;
   2596 
   2597   // Output stream.
   2598   FILE* stream_;
   2599   PrintDisassembler* print_disasm_;
   2600 
   2601   // Instruction statistics instrumentation.
   2602   Instrument* instrumentation_;
   2603 
   2604   // General purpose registers. Register 31 is the stack pointer.
   2605   SimRegister registers_[kNumberOfRegisters];
   2606 
   2607   // Vector registers
   2608   SimVRegister vregisters_[kNumberOfVRegisters];
   2609 
   2610   // Program Status Register.
   2611   // bits[31, 27]: Condition flags N, Z, C, and V.
   2612   //               (Negative, Zero, Carry, Overflow)
   2613   SimSystemRegister nzcv_;
   2614 
   2615   // Floating-Point Control Register
   2616   SimSystemRegister fpcr_;
   2617 
   2618   // Only a subset of FPCR features are supported by the simulator. This helper
   2619   // checks that the FPCR settings are supported.
   2620   //
   2621   // This is checked when floating-point instructions are executed, not when
   2622   // FPCR is set. This allows generated code to modify FPCR for external
   2623   // functions, or to save and restore it when entering and leaving generated
   2624   // code.
   2625   void AssertSupportedFPCR() {
   2626     VIXL_ASSERT(fpcr().FZ() == 0);             // No flush-to-zero support.
   2627     VIXL_ASSERT(fpcr().RMode() == FPTieEven);  // Ties-to-even rounding only.
   2628 
   2629     // The simulator does not support half-precision operations so fpcr().AHP()
   2630     // is irrelevant, and is not checked here.
   2631   }
   2632 
   2633   static int CalcNFlag(uint64_t result, unsigned reg_size) {
   2634     return (result >> (reg_size - 1)) & 1;
   2635   }
   2636 
   2637   static int CalcZFlag(uint64_t result) {
   2638     return result == 0;
   2639   }
   2640 
   2641   static const uint32_t kConditionFlagsMask = 0xf0000000;
   2642 
   2643   // Stack
   2644   byte* stack_;
   2645   static const int stack_protection_size_ = 256;
   2646   // 2 KB stack.
   2647   static const int stack_size_ = 2 * 1024 + 2 * stack_protection_size_;
   2648   byte* stack_limit_;
   2649 
   2650   Decoder* decoder_;
   2651   // Indicates if the pc has been modified by the instruction and should not be
   2652   // automatically incremented.
   2653   bool pc_modified_;
   2654   const Instruction* pc_;
   2655 
   2656   static const char* xreg_names[];
   2657   static const char* wreg_names[];
   2658   static const char* sreg_names[];
   2659   static const char* dreg_names[];
   2660   static const char* vreg_names[];
   2661 
   2662   static const Instruction* kEndOfSimAddress;
   2663 
   2664  private:
   2665   template <typename T>
   2666   static T FPDefaultNaN();
   2667 
   2668   // Standard NaN processing.
   2669   template <typename T>
   2670   T FPProcessNaN(T op) {
   2671     VIXL_ASSERT(std::isnan(op));
   2672     if (IsSignallingNaN(op)) {
   2673       FPProcessException();
   2674     }
   2675     return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
   2676   }
   2677 
   2678   template <typename T>
   2679   T FPProcessNaNs(T op1, T op2) {
   2680     if (IsSignallingNaN(op1)) {
   2681       return FPProcessNaN(op1);
   2682     } else if (IsSignallingNaN(op2)) {
   2683       return FPProcessNaN(op2);
   2684     } else if (std::isnan(op1)) {
   2685       VIXL_ASSERT(IsQuietNaN(op1));
   2686       return FPProcessNaN(op1);
   2687     } else if (std::isnan(op2)) {
   2688       VIXL_ASSERT(IsQuietNaN(op2));
   2689       return FPProcessNaN(op2);
   2690     } else {
   2691       return 0.0;
   2692     }
   2693   }
   2694 
   2695   template <typename T>
   2696   T FPProcessNaNs3(T op1, T op2, T op3) {
   2697     if (IsSignallingNaN(op1)) {
   2698       return FPProcessNaN(op1);
   2699     } else if (IsSignallingNaN(op2)) {
   2700       return FPProcessNaN(op2);
   2701     } else if (IsSignallingNaN(op3)) {
   2702       return FPProcessNaN(op3);
   2703     } else if (std::isnan(op1)) {
   2704       VIXL_ASSERT(IsQuietNaN(op1));
   2705       return FPProcessNaN(op1);
   2706     } else if (std::isnan(op2)) {
   2707       VIXL_ASSERT(IsQuietNaN(op2));
   2708       return FPProcessNaN(op2);
   2709     } else if (std::isnan(op3)) {
   2710       VIXL_ASSERT(IsQuietNaN(op3));
   2711       return FPProcessNaN(op3);
   2712     } else {
   2713       return 0.0;
   2714     }
   2715   }
   2716 
   2717   bool coloured_trace_;
   2718 
   2719   // A set of TraceParameters flags.
   2720   int trace_parameters_;
   2721 
   2722   // Indicates whether the instruction instrumentation is active.
   2723   bool instruction_stats_;
   2724 
   2725   // Indicates whether the exclusive-access warning has been printed.
   2726   bool print_exclusive_access_warning_;
   2727   void PrintExclusiveAccessWarning();
   2728 };
   2729 }  // namespace vixl
   2730 
   2731 #endif  // VIXL_A64_SIMULATOR_A64_H_
   2732