Home | History | Annotate | Download | only in a64
      1 // Copyright 2015, ARM Limited
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifdef VIXL_INCLUDE_SIMULATOR
     28 
     29 #include <string.h>
     30 #include <cmath>
     31 #include "vixl/a64/simulator-a64.h"
     32 
     33 namespace vixl {
     34 
     35 const Instruction* Simulator::kEndOfSimAddress = NULL;
     36 
     37 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
     38   int width = msb - lsb + 1;
     39   VIXL_ASSERT(is_uintn(width, bits) || is_intn(width, bits));
     40 
     41   bits <<= lsb;
     42   uint32_t mask = ((1 << width) - 1) << lsb;
     43   VIXL_ASSERT((mask & write_ignore_mask_) == 0);
     44 
     45   value_ = (value_ & ~mask) | (bits & mask);
     46 }
     47 
     48 
     49 SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
     50   switch (id) {
     51     case NZCV:
     52       return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
     53     case FPCR:
     54       return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
     55     default:
     56       VIXL_UNREACHABLE();
     57       return SimSystemRegister();
     58   }
     59 }
     60 
     61 
     62 Simulator::Simulator(Decoder* decoder, FILE* stream) {
     63   // Ensure that shift operations act as the simulator expects.
     64   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
     65   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
     66 
     67   instruction_stats_ = false;
     68 
     69   // Set up the decoder.
     70   decoder_ = decoder;
     71   decoder_->AppendVisitor(this);
     72 
     73   stream_ = stream;
     74   print_disasm_ = new PrintDisassembler(stream_);
     75   set_coloured_trace(false);
     76   trace_parameters_ = LOG_NONE;
     77 
     78   ResetState();
     79 
     80   // Allocate and set up the simulator stack.
     81   stack_ = new byte[stack_size_];
     82   stack_limit_ = stack_ + stack_protection_size_;
     83   // Configure the starting stack pointer.
     84   //  - Find the top of the stack.
     85   byte * tos = stack_ + stack_size_;
     86   //  - There's a protection region at both ends of the stack.
     87   tos -= stack_protection_size_;
     88   //  - The stack pointer must be 16-byte aligned.
     89   tos = AlignDown(tos, 16);
     90   set_sp(tos);
     91 
     92   // Set the sample period to 10, as the VIXL examples and tests are short.
     93   instrumentation_ = new Instrument("vixl_stats.csv", 10);
     94 
     95   // Print a warning about exclusive-access instructions, but only the first
     96   // time they are encountered. This warning can be silenced using
     97   // SilenceExclusiveAccessWarning().
     98   print_exclusive_access_warning_ = true;
     99 }
    100 
    101 
    102 void Simulator::ResetState() {
    103   // Reset the system registers.
    104   nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
    105   fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
    106 
    107   // Reset registers to 0.
    108   pc_ = NULL;
    109   pc_modified_ = false;
    110   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
    111     set_xreg(i, 0xbadbeef);
    112   }
    113   // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
    114   uint64_t nan_bits = UINT64_C(0x7ff0dead7f8beef1);
    115   VIXL_ASSERT(IsSignallingNaN(rawbits_to_double(nan_bits & kDRegMask)));
    116   VIXL_ASSERT(IsSignallingNaN(rawbits_to_float(nan_bits & kSRegMask)));
    117   for (unsigned i = 0; i < kNumberOfFPRegisters; i++) {
    118     set_dreg_bits(i, nan_bits);
    119   }
    120   // Returning to address 0 exits the Simulator.
    121   set_lr(kEndOfSimAddress);
    122 }
    123 
    124 
    125 Simulator::~Simulator() {
    126   delete[] stack_;
    127   // The decoder may outlive the simulator.
    128   decoder_->RemoveVisitor(print_disasm_);
    129   delete print_disasm_;
    130 
    131   decoder_->RemoveVisitor(instrumentation_);
    132   delete instrumentation_;
    133 }
    134 
    135 
    136 void Simulator::Run() {
    137   pc_modified_ = false;
    138   while (pc_ != kEndOfSimAddress) {
    139     ExecuteInstruction();
    140     LogAllWrittenRegisters();
    141   }
    142 }
    143 
    144 
    145 void Simulator::RunFrom(const Instruction* first) {
    146   set_pc(first);
    147   Run();
    148 }
    149 
    150 
    151 const char* Simulator::xreg_names[] = {
    152 "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
    153 "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    154 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
    155 "x24", "x25", "x26", "x27", "x28", "x29", "lr",  "xzr", "sp"};
    156 
    157 const char* Simulator::wreg_names[] = {
    158 "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
    159 "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
    160 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
    161 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr", "wsp"};
    162 
    163 const char* Simulator::sreg_names[] = {
    164 "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
    165 "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
    166 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
    167 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"};
    168 
    169 const char* Simulator::dreg_names[] = {
    170 "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
    171 "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
    172 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
    173 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
    174 
    175 const char* Simulator::vreg_names[] = {
    176 "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
    177 "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
    178 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
    179 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"};
    180 
    181 
    182 
    183 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
    184   VIXL_ASSERT(code < kNumberOfRegisters);
    185   // If the code represents the stack pointer, index the name after zr.
    186   if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
    187     code = kZeroRegCode + 1;
    188   }
    189   return wreg_names[code];
    190 }
    191 
    192 
    193 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
    194   VIXL_ASSERT(code < kNumberOfRegisters);
    195   // If the code represents the stack pointer, index the name after zr.
    196   if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
    197     code = kZeroRegCode + 1;
    198   }
    199   return xreg_names[code];
    200 }
    201 
    202 
    203 const char* Simulator::SRegNameForCode(unsigned code) {
    204   VIXL_ASSERT(code < kNumberOfFPRegisters);
    205   return sreg_names[code];
    206 }
    207 
    208 
    209 const char* Simulator::DRegNameForCode(unsigned code) {
    210   VIXL_ASSERT(code < kNumberOfFPRegisters);
    211   return dreg_names[code];
    212 }
    213 
    214 
    215 const char* Simulator::VRegNameForCode(unsigned code) {
    216   VIXL_ASSERT(code < kNumberOfVRegisters);
    217   return vreg_names[code];
    218 }
    219 
    220 
    221 #define COLOUR(colour_code)       "\033[0;" colour_code "m"
    222 #define COLOUR_BOLD(colour_code)  "\033[1;" colour_code "m"
    223 #define NORMAL  ""
    224 #define GREY    "30"
    225 #define RED     "31"
    226 #define GREEN   "32"
    227 #define YELLOW  "33"
    228 #define BLUE    "34"
    229 #define MAGENTA "35"
    230 #define CYAN    "36"
    231 #define WHITE   "37"
    232 void Simulator::set_coloured_trace(bool value) {
    233   coloured_trace_ = value;
    234 
    235   clr_normal          = value ? COLOUR(NORMAL)        : "";
    236   clr_flag_name       = value ? COLOUR_BOLD(WHITE)    : "";
    237   clr_flag_value      = value ? COLOUR(NORMAL)        : "";
    238   clr_reg_name        = value ? COLOUR_BOLD(CYAN)     : "";
    239   clr_reg_value       = value ? COLOUR(CYAN)          : "";
    240   clr_vreg_name       = value ? COLOUR_BOLD(MAGENTA)  : "";
    241   clr_vreg_value      = value ? COLOUR(MAGENTA)       : "";
    242   clr_memory_address  = value ? COLOUR_BOLD(BLUE)     : "";
    243   clr_warning         = value ? COLOUR_BOLD(YELLOW)   : "";
    244   clr_warning_message = value ? COLOUR(YELLOW)        : "";
    245   clr_printf          = value ? COLOUR(GREEN)         : "";
    246 }
    247 
    248 
    249 void Simulator::set_trace_parameters(int parameters) {
    250   bool disasm_before = trace_parameters_ & LOG_DISASM;
    251   trace_parameters_ = parameters;
    252   bool disasm_after = trace_parameters_ & LOG_DISASM;
    253 
    254   if (disasm_before != disasm_after) {
    255     if (disasm_after) {
    256       decoder_->InsertVisitorBefore(print_disasm_, this);
    257     } else {
    258       decoder_->RemoveVisitor(print_disasm_);
    259     }
    260   }
    261 }
    262 
    263 
    264 void Simulator::set_instruction_stats(bool value) {
    265   if (value != instruction_stats_) {
    266     if (value) {
    267       decoder_->AppendVisitor(instrumentation_);
    268     } else {
    269       decoder_->RemoveVisitor(instrumentation_);
    270     }
    271     instruction_stats_ = value;
    272   }
    273 }
    274 
    275 // Helpers ---------------------------------------------------------------------
    276 uint64_t Simulator::AddWithCarry(unsigned reg_size,
    277                                  bool set_flags,
    278                                  uint64_t left,
    279                                  uint64_t right,
    280                                  int carry_in) {
    281   VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
    282   VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
    283 
    284   uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
    285   uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
    286   uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
    287 
    288   left &= reg_mask;
    289   right &= reg_mask;
    290   uint64_t result = (left + right + carry_in) & reg_mask;
    291 
    292   if (set_flags) {
    293     nzcv().SetN(CalcNFlag(result, reg_size));
    294     nzcv().SetZ(CalcZFlag(result));
    295 
    296     // Compute the C flag by comparing the result to the max unsigned integer.
    297     uint64_t max_uint_2op = max_uint - carry_in;
    298     bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
    299     nzcv().SetC(C ? 1 : 0);
    300 
    301     // Overflow iff the sign bit is the same for the two inputs and different
    302     // for the result.
    303     uint64_t left_sign = left & sign_mask;
    304     uint64_t right_sign = right & sign_mask;
    305     uint64_t result_sign = result & sign_mask;
    306     bool V = (left_sign == right_sign) && (left_sign != result_sign);
    307     nzcv().SetV(V ? 1 : 0);
    308 
    309     LogSystemRegister(NZCV);
    310   }
    311   return result;
    312 }
    313 
    314 
    315 int64_t Simulator::ShiftOperand(unsigned reg_size,
    316                                 int64_t value,
    317                                 Shift shift_type,
    318                                 unsigned amount) {
    319   if (amount == 0) {
    320     return value;
    321   }
    322   int64_t mask = reg_size == kXRegSize ? kXRegMask : kWRegMask;
    323   switch (shift_type) {
    324     case LSL:
    325       return (value << amount) & mask;
    326     case LSR:
    327       return static_cast<uint64_t>(value) >> amount;
    328     case ASR: {
    329       // Shift used to restore the sign.
    330       unsigned s_shift = kXRegSize - reg_size;
    331       // Value with its sign restored.
    332       int64_t s_value = (value << s_shift) >> s_shift;
    333       return (s_value >> amount) & mask;
    334     }
    335     case ROR: {
    336       if (reg_size == kWRegSize) {
    337         value &= kWRegMask;
    338       }
    339       return (static_cast<uint64_t>(value) >> amount) |
    340              ((value & ((INT64_C(1) << amount) - 1)) <<
    341               (reg_size - amount));
    342     }
    343     default:
    344       VIXL_UNIMPLEMENTED();
    345       return 0;
    346   }
    347 }
    348 
    349 
    350 int64_t Simulator::ExtendValue(unsigned reg_size,
    351                                int64_t value,
    352                                Extend extend_type,
    353                                unsigned left_shift) {
    354   switch (extend_type) {
    355     case UXTB:
    356       value &= kByteMask;
    357       break;
    358     case UXTH:
    359       value &= kHalfWordMask;
    360       break;
    361     case UXTW:
    362       value &= kWordMask;
    363       break;
    364     case SXTB:
    365       value = (value << 56) >> 56;
    366       break;
    367     case SXTH:
    368       value = (value << 48) >> 48;
    369       break;
    370     case SXTW:
    371       value = (value << 32) >> 32;
    372       break;
    373     case UXTX:
    374     case SXTX:
    375       break;
    376     default:
    377       VIXL_UNREACHABLE();
    378   }
    379   int64_t mask = (reg_size == kXRegSize) ? kXRegMask : kWRegMask;
    380   return (value << left_shift) & mask;
    381 }
    382 
    383 
    384 void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
    385   AssertSupportedFPCR();
    386 
    387   // TODO: This assumes that the C++ implementation handles comparisons in the
    388   // way that we expect (as per AssertSupportedFPCR()).
    389   bool process_exception = false;
    390   if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) {
    391     nzcv().SetRawValue(FPUnorderedFlag);
    392     if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
    393         (trap == EnableTrap)) {
    394       process_exception = true;
    395     }
    396   } else if (val0 < val1) {
    397     nzcv().SetRawValue(FPLessThanFlag);
    398   } else if (val0 > val1) {
    399     nzcv().SetRawValue(FPGreaterThanFlag);
    400   } else if (val0 == val1) {
    401     nzcv().SetRawValue(FPEqualFlag);
    402   } else {
    403     VIXL_UNREACHABLE();
    404   }
    405   LogSystemRegister(NZCV);
    406   if (process_exception) FPProcessException();
    407 }
    408 
    409 
    410 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
    411     unsigned reg_size, unsigned lane_size) {
    412   VIXL_ASSERT(reg_size >= lane_size);
    413 
    414   uint32_t format = 0;
    415   if (reg_size != lane_size) {
    416     switch (reg_size) {
    417       default: VIXL_UNREACHABLE(); break;
    418       case kQRegSizeInBytes: format = kPrintRegAsQVector; break;
    419       case kDRegSizeInBytes: format = kPrintRegAsDVector; break;
    420     }
    421   }
    422 
    423   switch (lane_size) {
    424     default: VIXL_UNREACHABLE(); break;
    425     case kQRegSizeInBytes: format |= kPrintReg1Q; break;
    426     case kDRegSizeInBytes: format |= kPrintReg1D; break;
    427     case kSRegSizeInBytes: format |= kPrintReg1S; break;
    428     case kHRegSizeInBytes: format |= kPrintReg1H; break;
    429     case kBRegSizeInBytes: format |= kPrintReg1B; break;
    430   }
    431   // These sizes would be duplicate case labels.
    432   VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
    433   VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
    434   VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
    435   VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
    436 
    437   return static_cast<PrintRegisterFormat>(format);
    438 }
    439 
    440 
    441 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
    442     VectorFormat vform) {
    443   switch (vform) {
    444     default: VIXL_UNREACHABLE(); return kPrintReg16B;
    445     case kFormat16B: return kPrintReg16B;
    446     case kFormat8B: return kPrintReg8B;
    447     case kFormat8H: return kPrintReg8H;
    448     case kFormat4H: return kPrintReg4H;
    449     case kFormat4S: return kPrintReg4S;
    450     case kFormat2S: return kPrintReg2S;
    451     case kFormat2D: return kPrintReg2D;
    452     case kFormat1D: return kPrintReg1D;
    453   }
    454 }
    455 
    456 
    457 void Simulator::PrintWrittenRegisters() {
    458   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
    459     if (registers_[i].WrittenSinceLastLog()) PrintRegister(i);
    460   }
    461 }
    462 
    463 
    464 void Simulator::PrintWrittenVRegisters() {
    465   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
    466     // At this point there is no type information, so print as a raw 1Q.
    467     if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q);
    468   }
    469 }
    470 
    471 
    472 void Simulator::PrintSystemRegisters() {
    473   PrintSystemRegister(NZCV);
    474   PrintSystemRegister(FPCR);
    475 }
    476 
    477 
    478 void Simulator::PrintRegisters() {
    479   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
    480     PrintRegister(i);
    481   }
    482 }
    483 
    484 
    485 void Simulator::PrintVRegisters() {
    486   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
    487     // At this point there is no type information, so print as a raw 1Q.
    488     PrintVRegister(i, kPrintReg1Q);
    489   }
    490 }
    491 
    492 
    493 // Print a register's name and raw value.
    494 //
    495 // Only the least-significant `size_in_bytes` bytes of the register are printed,
    496 // but the value is aligned as if the whole register had been printed.
    497 //
    498 // For typical register updates, size_in_bytes should be set to kXRegSizeInBytes
    499 // -- the default -- so that the whole register is printed. Other values of
    500 // size_in_bytes are intended for use when the register hasn't actually been
    501 // updated (such as in PrintWrite).
    502 //
    503 // No newline is printed. This allows the caller to print more details (such as
    504 // a memory access annotation).
    505 void Simulator::PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
    506                                        int size_in_bytes) {
    507   // The template for all supported sizes.
    508   //   "# x{code}: 0xffeeddccbbaa9988"
    509   //   "# w{code}:         0xbbaa9988"
    510   //   "# w{code}<15:0>:       0x9988"
    511   //   "# w{code}<7:0>:          0x88"
    512   unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2;
    513 
    514   const char * name = "";
    515   const char * suffix = "";
    516   switch (size_in_bytes) {
    517     case kXRegSizeInBytes: name = XRegNameForCode(code, r31mode); break;
    518     case kWRegSizeInBytes: name = WRegNameForCode(code, r31mode); break;
    519     case 2:
    520       name = WRegNameForCode(code, r31mode);
    521       suffix = "<15:0>";
    522       padding_chars -= strlen(suffix);
    523       break;
    524     case 1:
    525       name = WRegNameForCode(code, r31mode);
    526       suffix = "<7:0>";
    527       padding_chars -= strlen(suffix);
    528       break;
    529     default:
    530       VIXL_UNREACHABLE();
    531   }
    532   fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix);
    533 
    534   // Print leading padding spaces.
    535   VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2));
    536   for (unsigned i = 0; i < padding_chars; i++) {
    537     putc(' ', stream_);
    538   }
    539 
    540   // Print the specified bits in hexadecimal format.
    541   uint64_t bits = reg<uint64_t>(code, r31mode);
    542   bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8);
    543   VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes);
    544 
    545   int chars = size_in_bytes * 2;
    546   fprintf(stream_, "%s0x%0*" PRIx64 "%s",
    547           clr_reg_value, chars, bits, clr_normal);
    548 }
    549 
    550 
    551 void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) {
    552   registers_[code].NotifyRegisterLogged();
    553 
    554   // Don't print writes into xzr.
    555   if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) {
    556     return;
    557   }
    558 
    559   // The template for all x and w registers:
    560   //   "# x{code}: 0x{value}"
    561   //   "# w{code}: 0x{value}"
    562 
    563   PrintRegisterRawHelper(code, r31mode);
    564   fprintf(stream_, "\n");
    565 }
    566 
    567 
    568 // Print a register's name and raw value.
    569 //
    570 // The `bytes` and `lsb` arguments can be used to limit the bytes that are
    571 // printed. These arguments are intended for use in cases where register hasn't
    572 // actually been updated (such as in PrintVWrite).
    573 //
    574 // No newline is printed. This allows the caller to print more details (such as
    575 // a floating-point interpretation or a memory access annotation).
    576 void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) {
    577   // The template for vector types:
    578   //   "# v{code}: 0xffeeddccbbaa99887766554433221100".
    579   // An example with bytes=4 and lsb=8:
    580   //   "# v{code}:         0xbbaa9988                ".
    581   fprintf(stream_, "# %s%5s: %s",
    582           clr_vreg_name, VRegNameForCode(code), clr_vreg_value);
    583 
    584   int msb = lsb + bytes - 1;
    585   int byte = kQRegSizeInBytes - 1;
    586 
    587   // Print leading padding spaces. (Two spaces per byte.)
    588   while (byte > msb) {
    589     fprintf(stream_, "  ");
    590     byte--;
    591   }
    592 
    593   // Print the specified part of the value, byte by byte.
    594   qreg_t rawbits = qreg(code);
    595   fprintf(stream_, "0x");
    596   while (byte >= lsb) {
    597     fprintf(stream_, "%02x", rawbits.val[byte]);
    598     byte--;
    599   }
    600 
    601   // Print trailing padding spaces.
    602   while (byte >= 0) {
    603     fprintf(stream_, "  ");
    604     byte--;
    605   }
    606   fprintf(stream_, "%s", clr_normal);
    607 }
    608 
    609 
    610 // Print each of the specified lanes of a register as a float or double value.
    611 //
    612 // The `lane_count` and `lslane` arguments can be used to limit the lanes that
    613 // are printed. These arguments are intended for use in cases where register
    614 // hasn't actually been updated (such as in PrintVWrite).
    615 //
    616 // No newline is printed. This allows the caller to print more details (such as
    617 // a memory access annotation).
    618 void Simulator::PrintVRegisterFPHelper(unsigned code,
    619                                        unsigned lane_size_in_bytes,
    620                                        int lane_count,
    621                                        int rightmost_lane) {
    622   VIXL_ASSERT((lane_size_in_bytes == kSRegSizeInBytes) ||
    623               (lane_size_in_bytes == kDRegSizeInBytes));
    624 
    625   unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes);
    626   VIXL_ASSERT(msb <= kQRegSizeInBytes);
    627 
    628   // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register
    629   // name is used:
    630   //   " (s{code}: {value})"
    631   //   " (d{code}: {value})"
    632   // For vector types, "..." is used to represent one or more omitted lanes.
    633   //   " (..., {value}, {value}, ...)"
    634   if ((lane_count == 1) && (rightmost_lane == 0)) {
    635     const char * name =
    636         (lane_size_in_bytes == kSRegSizeInBytes) ? SRegNameForCode(code)
    637                                                  : DRegNameForCode(code);
    638     fprintf(stream_, " (%s%s: ", clr_vreg_name, name);
    639   } else {
    640     if (msb < (kQRegSizeInBytes - 1)) {
    641       fprintf(stream_, " (..., ");
    642     } else {
    643       fprintf(stream_, " (");
    644     }
    645   }
    646 
    647   // Print the list of values.
    648   const char * separator = "";
    649   int leftmost_lane = rightmost_lane + lane_count - 1;
    650   for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) {
    651     double value =
    652         (lane_size_in_bytes == kSRegSizeInBytes) ? vreg(code).Get<float>(lane)
    653                                                  : vreg(code).Get<double>(lane);
    654     fprintf(stream_, "%s%s%#g%s", separator, clr_vreg_value, value, clr_normal);
    655     separator = ", ";
    656   }
    657 
    658   if (rightmost_lane > 0) {
    659     fprintf(stream_, ", ...");
    660   }
    661   fprintf(stream_, ")");
    662 }
    663 
    664 
    665 void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) {
    666   vregisters_[code].NotifyRegisterLogged();
    667 
    668   int lane_size_log2 = format & kPrintRegLaneSizeMask;
    669 
    670   int reg_size_log2;
    671   if (format & kPrintRegAsQVector) {
    672     reg_size_log2 = kQRegSizeInBytesLog2;
    673   } else if (format & kPrintRegAsDVector) {
    674     reg_size_log2 = kDRegSizeInBytesLog2;
    675   } else {
    676     // Scalar types.
    677     reg_size_log2 = lane_size_log2;
    678   }
    679 
    680   int lane_count = 1 << (reg_size_log2 - lane_size_log2);
    681   int lane_size = 1 << lane_size_log2;
    682 
    683   // The template for vector types:
    684   //   "# v{code}: 0x{rawbits} (..., {value}, ...)".
    685   // The template for scalar types:
    686   //   "# v{code}: 0x{rawbits} ({reg}:{value})".
    687   // The values in parentheses after the bit representations are floating-point
    688   // interpretations. They are displayed only if the kPrintVRegAsFP bit is set.
    689 
    690   PrintVRegisterRawHelper(code);
    691   if (format & kPrintRegAsFP) {
    692     PrintVRegisterFPHelper(code, lane_size, lane_count);
    693   }
    694 
    695   fprintf(stream_, "\n");
    696 }
    697 
    698 
    699 void Simulator::PrintSystemRegister(SystemRegister id) {
    700   switch (id) {
    701     case NZCV:
    702       fprintf(stream_, "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
    703               clr_flag_name, clr_flag_value,
    704               nzcv().N(), nzcv().Z(), nzcv().C(), nzcv().V(),
    705               clr_normal);
    706       break;
    707     case FPCR: {
    708       static const char * rmode[] = {
    709         "0b00 (Round to Nearest)",
    710         "0b01 (Round towards Plus Infinity)",
    711         "0b10 (Round towards Minus Infinity)",
    712         "0b11 (Round towards Zero)"
    713       };
    714       VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0])));
    715       fprintf(stream_,
    716               "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
    717               clr_flag_name, clr_flag_value,
    718               fpcr().AHP(), fpcr().DN(), fpcr().FZ(), rmode[fpcr().RMode()],
    719               clr_normal);
    720       break;
    721     }
    722     default:
    723       VIXL_UNREACHABLE();
    724   }
    725 }
    726 
    727 
    728 void Simulator::PrintRead(uintptr_t address,
    729                           unsigned reg_code,
    730                           PrintRegisterFormat format) {
    731   registers_[reg_code].NotifyRegisterLogged();
    732 
    733   USE(format);
    734 
    735   // The template is "# {reg}: 0x{value} <- {address}".
    736   PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister);
    737   fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
    738           clr_memory_address, address, clr_normal);
    739 }
    740 
    741 
    742 void Simulator::PrintVRead(uintptr_t address,
    743                            unsigned reg_code,
    744                            PrintRegisterFormat format,
    745                            unsigned lane) {
    746   vregisters_[reg_code].NotifyRegisterLogged();
    747 
    748   // The template is "# v{code}: 0x{rawbits} <- address".
    749   PrintVRegisterRawHelper(reg_code);
    750   if (format & kPrintRegAsFP) {
    751     PrintVRegisterFPHelper(reg_code, GetPrintRegLaneSizeInBytes(format),
    752                            GetPrintRegLaneCount(format), lane);
    753   }
    754   fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
    755           clr_memory_address, address, clr_normal);
    756 }
    757 
    758 
    759 void Simulator::PrintWrite(uintptr_t address,
    760                            unsigned reg_code,
    761                            PrintRegisterFormat format) {
    762   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
    763 
    764   // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy
    765   // and readable, the value is aligned with the values in the register trace.
    766   PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister,
    767                          GetPrintRegSizeInBytes(format));
    768   fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
    769           clr_memory_address, address, clr_normal);
    770 }
    771 
    772 
    773 void Simulator::PrintVWrite(uintptr_t address,
    774                             unsigned reg_code,
    775                             PrintRegisterFormat format,
    776                             unsigned lane) {
    777   // The templates:
    778   //   "# v{code}: 0x{rawbits} -> {address}"
    779   //   "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}".
    780   //   "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}"
    781   // Because this trace doesn't represent a change to the source register's
    782   // value, only the relevant part of the value is printed. To keep the trace
    783   // tidy and readable, the raw value is aligned with the other values in the
    784   // register trace.
    785   int lane_count = GetPrintRegLaneCount(format);
    786   int lane_size = GetPrintRegLaneSizeInBytes(format);
    787   int reg_size = GetPrintRegSizeInBytes(format);
    788   PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane);
    789   if (format & kPrintRegAsFP) {
    790     PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane);
    791   }
    792   fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
    793           clr_memory_address, address, clr_normal);
    794 }
    795 
    796 
    797 // Visitors---------------------------------------------------------------------
    798 
    799 void Simulator::VisitUnimplemented(const Instruction* instr) {
    800   printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
    801          reinterpret_cast<const void*>(instr), instr->InstructionBits());
    802   VIXL_UNIMPLEMENTED();
    803 }
    804 
    805 
    806 void Simulator::VisitUnallocated(const Instruction* instr) {
    807   printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
    808          reinterpret_cast<const void*>(instr), instr->InstructionBits());
    809   VIXL_UNIMPLEMENTED();
    810 }
    811 
    812 
    813 void Simulator::VisitPCRelAddressing(const Instruction* instr) {
    814   VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
    815               (instr->Mask(PCRelAddressingMask) == ADRP));
    816 
    817   set_reg(instr->Rd(), instr->ImmPCOffsetTarget());
    818 }
    819 
    820 
    821 void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
    822   switch (instr->Mask(UnconditionalBranchMask)) {
    823     case BL:
    824       set_lr(instr->NextInstruction());
    825       VIXL_FALLTHROUGH();
    826     case B:
    827       set_pc(instr->ImmPCOffsetTarget());
    828       break;
    829     default: VIXL_UNREACHABLE();
    830   }
    831 }
    832 
    833 
    834 void Simulator::VisitConditionalBranch(const Instruction* instr) {
    835   VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
    836   if (ConditionPassed(instr->ConditionBranch())) {
    837     set_pc(instr->ImmPCOffsetTarget());
    838   }
    839 }
    840 
    841 
    842 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
    843   const Instruction* target = Instruction::Cast(xreg(instr->Rn()));
    844 
    845   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
    846     case BLR:
    847       set_lr(instr->NextInstruction());
    848       VIXL_FALLTHROUGH();
    849     case BR:
    850     case RET: set_pc(target); break;
    851     default: VIXL_UNREACHABLE();
    852   }
    853 }
    854 
    855 
    856 void Simulator::VisitTestBranch(const Instruction* instr) {
    857   unsigned bit_pos = (instr->ImmTestBranchBit5() << 5) |
    858                      instr->ImmTestBranchBit40();
    859   bool bit_zero = ((xreg(instr->Rt()) >> bit_pos) & 1) == 0;
    860   bool take_branch = false;
    861   switch (instr->Mask(TestBranchMask)) {
    862     case TBZ: take_branch = bit_zero; break;
    863     case TBNZ: take_branch = !bit_zero; break;
    864     default: VIXL_UNIMPLEMENTED();
    865   }
    866   if (take_branch) {
    867     set_pc(instr->ImmPCOffsetTarget());
    868   }
    869 }
    870 
    871 
    872 void Simulator::VisitCompareBranch(const Instruction* instr) {
    873   unsigned rt = instr->Rt();
    874   bool take_branch = false;
    875   switch (instr->Mask(CompareBranchMask)) {
    876     case CBZ_w: take_branch = (wreg(rt) == 0); break;
    877     case CBZ_x: take_branch = (xreg(rt) == 0); break;
    878     case CBNZ_w: take_branch = (wreg(rt) != 0); break;
    879     case CBNZ_x: take_branch = (xreg(rt) != 0); break;
    880     default: VIXL_UNIMPLEMENTED();
    881   }
    882   if (take_branch) {
    883     set_pc(instr->ImmPCOffsetTarget());
    884   }
    885 }
    886 
    887 
    888 void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
    889   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    890   bool set_flags = instr->FlagsUpdate();
    891   int64_t new_val = 0;
    892   Instr operation = instr->Mask(AddSubOpMask);
    893 
    894   switch (operation) {
    895     case ADD:
    896     case ADDS: {
    897       new_val = AddWithCarry(reg_size,
    898                              set_flags,
    899                              reg(reg_size, instr->Rn(), instr->RnMode()),
    900                              op2);
    901       break;
    902     }
    903     case SUB:
    904     case SUBS: {
    905       new_val = AddWithCarry(reg_size,
    906                              set_flags,
    907                              reg(reg_size, instr->Rn(), instr->RnMode()),
    908                              ~op2,
    909                              1);
    910       break;
    911     }
    912     default: VIXL_UNREACHABLE();
    913   }
    914 
    915   set_reg(reg_size, instr->Rd(), new_val, LogRegWrites, instr->RdMode());
    916 }
    917 
    918 
    919 void Simulator::VisitAddSubShifted(const Instruction* instr) {
    920   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    921   int64_t op2 = ShiftOperand(reg_size,
    922                              reg(reg_size, instr->Rm()),
    923                              static_cast<Shift>(instr->ShiftDP()),
    924                              instr->ImmDPShift());
    925   AddSubHelper(instr, op2);
    926 }
    927 
    928 
    929 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
    930   int64_t op2 = instr->ImmAddSub() << ((instr->ShiftAddSub() == 1) ? 12 : 0);
    931   AddSubHelper(instr, op2);
    932 }
    933 
    934 
    935 void Simulator::VisitAddSubExtended(const Instruction* instr) {
    936   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    937   int64_t op2 = ExtendValue(reg_size,
    938                             reg(reg_size, instr->Rm()),
    939                             static_cast<Extend>(instr->ExtendMode()),
    940                             instr->ImmExtendShift());
    941   AddSubHelper(instr, op2);
    942 }
    943 
    944 
    945 void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
    946   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    947   int64_t op2 = reg(reg_size, instr->Rm());
    948   int64_t new_val;
    949 
    950   if ((instr->Mask(AddSubOpMask) == SUB) || instr->Mask(AddSubOpMask) == SUBS) {
    951     op2 = ~op2;
    952   }
    953 
    954   new_val = AddWithCarry(reg_size,
    955                          instr->FlagsUpdate(),
    956                          reg(reg_size, instr->Rn()),
    957                          op2,
    958                          C());
    959 
    960   set_reg(reg_size, instr->Rd(), new_val);
    961 }
    962 
    963 
    964 void Simulator::VisitLogicalShifted(const Instruction* instr) {
    965   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    966   Shift shift_type = static_cast<Shift>(instr->ShiftDP());
    967   unsigned shift_amount = instr->ImmDPShift();
    968   int64_t op2 = ShiftOperand(reg_size, reg(reg_size, instr->Rm()), shift_type,
    969                              shift_amount);
    970   if (instr->Mask(NOT) == NOT) {
    971     op2 = ~op2;
    972   }
    973   LogicalHelper(instr, op2);
    974 }
    975 
    976 
    977 void Simulator::VisitLogicalImmediate(const Instruction* instr) {
    978   LogicalHelper(instr, instr->ImmLogical());
    979 }
    980 
    981 
    982 void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
    983   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    984   int64_t op1 = reg(reg_size, instr->Rn());
    985   int64_t result = 0;
    986   bool update_flags = false;
    987 
    988   // Switch on the logical operation, stripping out the NOT bit, as it has a
    989   // different meaning for logical immediate instructions.
    990   switch (instr->Mask(LogicalOpMask & ~NOT)) {
    991     case ANDS: update_flags = true; VIXL_FALLTHROUGH();
    992     case AND: result = op1 & op2; break;
    993     case ORR: result = op1 | op2; break;
    994     case EOR: result = op1 ^ op2; break;
    995     default:
    996       VIXL_UNIMPLEMENTED();
    997   }
    998 
    999   if (update_flags) {
   1000     nzcv().SetN(CalcNFlag(result, reg_size));
   1001     nzcv().SetZ(CalcZFlag(result));
   1002     nzcv().SetC(0);
   1003     nzcv().SetV(0);
   1004     LogSystemRegister(NZCV);
   1005   }
   1006 
   1007   set_reg(reg_size, instr->Rd(), result, LogRegWrites, instr->RdMode());
   1008 }
   1009 
   1010 
   1011 void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
   1012   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   1013   ConditionalCompareHelper(instr, reg(reg_size, instr->Rm()));
   1014 }
   1015 
   1016 
   1017 void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
   1018   ConditionalCompareHelper(instr, instr->ImmCondCmp());
   1019 }
   1020 
   1021 
   1022 void Simulator::ConditionalCompareHelper(const Instruction* instr,
   1023                                          int64_t op2) {
   1024   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   1025   int64_t op1 = reg(reg_size, instr->Rn());
   1026 
   1027   if (ConditionPassed(instr->Condition())) {
   1028     // If the condition passes, set the status flags to the result of comparing
   1029     // the operands.
   1030     if (instr->Mask(ConditionalCompareMask) == CCMP) {
   1031       AddWithCarry(reg_size, true, op1, ~op2, 1);
   1032     } else {
   1033       VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
   1034       AddWithCarry(reg_size, true, op1, op2, 0);
   1035     }
   1036   } else {
   1037     // If the condition fails, set the status flags to the nzcv immediate.
   1038     nzcv().SetFlags(instr->Nzcv());
   1039     LogSystemRegister(NZCV);
   1040   }
   1041 }
   1042 
   1043 
   1044 void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   1045   int offset = instr->ImmLSUnsigned() << instr->SizeLS();
   1046   LoadStoreHelper(instr, offset, Offset);
   1047 }
   1048 
   1049 
   1050 void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
   1051   LoadStoreHelper(instr, instr->ImmLS(), Offset);
   1052 }
   1053 
   1054 
   1055 void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
   1056   LoadStoreHelper(instr, instr->ImmLS(), PreIndex);
   1057 }
   1058 
   1059 
   1060 void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
   1061   LoadStoreHelper(instr, instr->ImmLS(), PostIndex);
   1062 }
   1063 
   1064 
   1065 void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
   1066   Extend ext = static_cast<Extend>(instr->ExtendMode());
   1067   VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
   1068   unsigned shift_amount = instr->ImmShiftLS() * instr->SizeLS();
   1069 
   1070   int64_t offset = ExtendValue(kXRegSize, xreg(instr->Rm()), ext,
   1071                                shift_amount);
   1072   LoadStoreHelper(instr, offset, Offset);
   1073 }
   1074 
   1075 
   1076 
   1077 void Simulator::LoadStoreHelper(const Instruction* instr,
   1078                                 int64_t offset,
   1079                                 AddrMode addrmode) {
   1080   unsigned srcdst = instr->Rt();
   1081   uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
   1082 
   1083   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
   1084   switch (op) {
   1085     case LDRB_w:
   1086       set_wreg(srcdst, Memory::Read<uint8_t>(address), NoRegLog); break;
   1087     case LDRH_w:
   1088       set_wreg(srcdst, Memory::Read<uint16_t>(address), NoRegLog); break;
   1089     case LDR_w:
   1090       set_wreg(srcdst, Memory::Read<uint32_t>(address), NoRegLog); break;
   1091     case LDR_x:
   1092       set_xreg(srcdst, Memory::Read<uint64_t>(address), NoRegLog); break;
   1093     case LDRSB_w:
   1094       set_wreg(srcdst, Memory::Read<int8_t>(address), NoRegLog); break;
   1095     case LDRSH_w:
   1096       set_wreg(srcdst, Memory::Read<int16_t>(address), NoRegLog); break;
   1097     case LDRSB_x:
   1098       set_xreg(srcdst, Memory::Read<int8_t>(address), NoRegLog); break;
   1099     case LDRSH_x:
   1100       set_xreg(srcdst, Memory::Read<int16_t>(address), NoRegLog); break;
   1101     case LDRSW_x:
   1102       set_xreg(srcdst, Memory::Read<int32_t>(address), NoRegLog); break;
   1103     case LDR_b:
   1104       set_breg(srcdst, Memory::Read<uint8_t>(address), NoRegLog); break;
   1105     case LDR_h:
   1106       set_hreg(srcdst, Memory::Read<uint16_t>(address), NoRegLog); break;
   1107     case LDR_s:
   1108       set_sreg(srcdst, Memory::Read<float>(address), NoRegLog); break;
   1109     case LDR_d:
   1110       set_dreg(srcdst, Memory::Read<double>(address), NoRegLog); break;
   1111     case LDR_q:
   1112       set_qreg(srcdst, Memory::Read<qreg_t>(address), NoRegLog); break;
   1113 
   1114     case STRB_w:  Memory::Write<uint8_t>(address, wreg(srcdst)); break;
   1115     case STRH_w:  Memory::Write<uint16_t>(address, wreg(srcdst)); break;
   1116     case STR_w:   Memory::Write<uint32_t>(address, wreg(srcdst)); break;
   1117     case STR_x:   Memory::Write<uint64_t>(address, xreg(srcdst)); break;
   1118     case STR_b:   Memory::Write<uint8_t>(address, breg(srcdst)); break;
   1119     case STR_h:   Memory::Write<uint16_t>(address, hreg(srcdst)); break;
   1120     case STR_s:   Memory::Write<float>(address, sreg(srcdst)); break;
   1121     case STR_d:   Memory::Write<double>(address, dreg(srcdst)); break;
   1122     case STR_q:   Memory::Write<qreg_t>(address, qreg(srcdst)); break;
   1123 
   1124     // Ignore prfm hint instructions.
   1125     case PRFM: break;
   1126 
   1127     default: VIXL_UNIMPLEMENTED();
   1128   }
   1129 
   1130   unsigned access_size = 1 << instr->SizeLS();
   1131   if (instr->IsLoad()) {
   1132     if ((op == LDR_s) || (op == LDR_d)) {
   1133       LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
   1134     } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) {
   1135       LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1136     } else {
   1137       LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1138     }
   1139   } else {
   1140     if ((op == STR_s) || (op == STR_d)) {
   1141       LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
   1142     } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
   1143       LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1144     } else {
   1145       LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1146     }
   1147   }
   1148 
   1149   local_monitor_.MaybeClear();
   1150 }
   1151 
   1152 
   1153 void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
   1154   LoadStorePairHelper(instr, Offset);
   1155 }
   1156 
   1157 
   1158 void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
   1159   LoadStorePairHelper(instr, PreIndex);
   1160 }
   1161 
   1162 
   1163 void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
   1164   LoadStorePairHelper(instr, PostIndex);
   1165 }
   1166 
   1167 
   1168 void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
   1169   LoadStorePairHelper(instr, Offset);
   1170 }
   1171 
   1172 
   1173 void Simulator::LoadStorePairHelper(const Instruction* instr,
   1174                                     AddrMode addrmode) {
   1175   unsigned rt = instr->Rt();
   1176   unsigned rt2 = instr->Rt2();
   1177   int element_size = 1 << instr->SizeLSPair();
   1178   int64_t offset = instr->ImmLSPair() * element_size;
   1179   uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
   1180   uintptr_t address2 = address + element_size;
   1181 
   1182   LoadStorePairOp op =
   1183     static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
   1184 
   1185   // 'rt' and 'rt2' can only be aliased for stores.
   1186   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
   1187 
   1188   switch (op) {
   1189     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
   1190     // will print a more detailed log.
   1191     case LDP_w: {
   1192       set_wreg(rt, Memory::Read<uint32_t>(address), NoRegLog);
   1193       set_wreg(rt2, Memory::Read<uint32_t>(address2), NoRegLog);
   1194       break;
   1195     }
   1196     case LDP_s: {
   1197       set_sreg(rt, Memory::Read<float>(address), NoRegLog);
   1198       set_sreg(rt2, Memory::Read<float>(address2), NoRegLog);
   1199       break;
   1200     }
   1201     case LDP_x: {
   1202       set_xreg(rt, Memory::Read<uint64_t>(address), NoRegLog);
   1203       set_xreg(rt2, Memory::Read<uint64_t>(address2), NoRegLog);
   1204       break;
   1205     }
   1206     case LDP_d: {
   1207       set_dreg(rt, Memory::Read<double>(address), NoRegLog);
   1208       set_dreg(rt2, Memory::Read<double>(address2), NoRegLog);
   1209       break;
   1210     }
   1211     case LDP_q: {
   1212       set_qreg(rt, Memory::Read<qreg_t>(address), NoRegLog);
   1213       set_qreg(rt2, Memory::Read<qreg_t>(address2), NoRegLog);
   1214       break;
   1215     }
   1216     case LDPSW_x: {
   1217       set_xreg(rt, Memory::Read<int32_t>(address), NoRegLog);
   1218       set_xreg(rt2, Memory::Read<int32_t>(address2), NoRegLog);
   1219       break;
   1220     }
   1221     case STP_w: {
   1222       Memory::Write<uint32_t>(address, wreg(rt));
   1223       Memory::Write<uint32_t>(address2, wreg(rt2));
   1224       break;
   1225     }
   1226     case STP_s: {
   1227       Memory::Write<float>(address, sreg(rt));
   1228       Memory::Write<float>(address2, sreg(rt2));
   1229       break;
   1230     }
   1231     case STP_x: {
   1232       Memory::Write<uint64_t>(address, xreg(rt));
   1233       Memory::Write<uint64_t>(address2, xreg(rt2));
   1234       break;
   1235     }
   1236     case STP_d: {
   1237       Memory::Write<double>(address, dreg(rt));
   1238       Memory::Write<double>(address2, dreg(rt2));
   1239       break;
   1240     }
   1241     case STP_q: {
   1242       Memory::Write<qreg_t>(address, qreg(rt));
   1243       Memory::Write<qreg_t>(address2, qreg(rt2));
   1244       break;
   1245     }
   1246     default: VIXL_UNREACHABLE();
   1247   }
   1248 
   1249   // Print a detailed trace (including the memory address) instead of the basic
   1250   // register:value trace generated by set_*reg().
   1251   if (instr->IsLoad()) {
   1252     if ((op == LDP_s) || (op == LDP_d)) {
   1253       LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
   1254       LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
   1255     } else if (op == LDP_q) {
   1256       LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size));
   1257       LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1258     } else {
   1259       LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
   1260       LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1261     }
   1262   } else {
   1263     if ((op == STP_s) || (op == STP_d)) {
   1264       LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
   1265       LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
   1266     } else if (op == STP_q) {
   1267       LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
   1268       LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1269     } else {
   1270       LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
   1271       LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1272     }
   1273   }
   1274 
   1275   local_monitor_.MaybeClear();
   1276 }
   1277 
   1278 
   1279 void Simulator::PrintExclusiveAccessWarning() {
   1280   if (print_exclusive_access_warning_) {
   1281     fprintf(
   1282         stderr,
   1283         "%sWARNING:%s VIXL simulator support for load-/store-/clear-exclusive "
   1284         "instructions is limited. Refer to the README for details.%s\n",
   1285         clr_warning, clr_warning_message, clr_normal);
   1286     print_exclusive_access_warning_ = false;
   1287   }
   1288 }
   1289 
   1290 
   1291 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
   1292   PrintExclusiveAccessWarning();
   1293 
   1294   unsigned rs = instr->Rs();
   1295   unsigned rt = instr->Rt();
   1296   unsigned rt2 = instr->Rt2();
   1297   unsigned rn = instr->Rn();
   1298 
   1299   LoadStoreExclusive op =
   1300       static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
   1301 
   1302   bool is_acquire_release = instr->LdStXAcquireRelease();
   1303   bool is_exclusive = !instr->LdStXNotExclusive();
   1304   bool is_load = instr->LdStXLoad();
   1305   bool is_pair = instr->LdStXPair();
   1306 
   1307   unsigned element_size = 1 << instr->LdStXSizeLog2();
   1308   unsigned access_size = is_pair ? element_size * 2 : element_size;
   1309   uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
   1310 
   1311   // Verify that the address is available to the host.
   1312   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1313 
   1314   // Check the alignment of `address`.
   1315   if (AlignDown(address, access_size) != address) {
   1316     VIXL_ALIGNMENT_EXCEPTION();
   1317   }
   1318 
   1319   // The sp must be aligned to 16 bytes when it is accessed.
   1320   if ((rn == 31) && (AlignDown(address, 16) != address)) {
   1321     VIXL_ALIGNMENT_EXCEPTION();
   1322   }
   1323 
   1324   if (is_load) {
   1325     if (is_exclusive) {
   1326       local_monitor_.MarkExclusive(address, access_size);
   1327     } else {
   1328       // Any non-exclusive load can clear the local monitor as a side effect. We
   1329       // don't need to do this, but it is useful to stress the simulated code.
   1330       local_monitor_.Clear();
   1331     }
   1332 
   1333     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
   1334     // will print a more detailed log.
   1335     switch (op) {
   1336       case LDXRB_w:
   1337       case LDAXRB_w:
   1338       case LDARB_w:
   1339         set_wreg(rt, Memory::Read<uint8_t>(address), NoRegLog);
   1340         break;
   1341       case LDXRH_w:
   1342       case LDAXRH_w:
   1343       case LDARH_w:
   1344         set_wreg(rt, Memory::Read<uint16_t>(address), NoRegLog);
   1345         break;
   1346       case LDXR_w:
   1347       case LDAXR_w:
   1348       case LDAR_w:
   1349         set_wreg(rt, Memory::Read<uint32_t>(address), NoRegLog);
   1350         break;
   1351       case LDXR_x:
   1352       case LDAXR_x:
   1353       case LDAR_x:
   1354         set_xreg(rt, Memory::Read<uint64_t>(address), NoRegLog);
   1355         break;
   1356       case LDXP_w:
   1357       case LDAXP_w:
   1358         set_wreg(rt, Memory::Read<uint32_t>(address), NoRegLog);
   1359         set_wreg(rt2, Memory::Read<uint32_t>(address + element_size), NoRegLog);
   1360         break;
   1361       case LDXP_x:
   1362       case LDAXP_x:
   1363         set_xreg(rt, Memory::Read<uint64_t>(address), NoRegLog);
   1364         set_xreg(rt2, Memory::Read<uint64_t>(address + element_size), NoRegLog);
   1365         break;
   1366       default:
   1367         VIXL_UNREACHABLE();
   1368     }
   1369 
   1370     if (is_acquire_release) {
   1371       // Approximate load-acquire by issuing a full barrier after the load.
   1372       __sync_synchronize();
   1373     }
   1374 
   1375     LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
   1376     if (is_pair) {
   1377       LogRead(address + element_size, rt2,
   1378               GetPrintRegisterFormatForSize(element_size));
   1379     }
   1380   } else {
   1381     if (is_acquire_release) {
   1382       // Approximate store-release by issuing a full barrier before the store.
   1383       __sync_synchronize();
   1384     }
   1385 
   1386     bool do_store = true;
   1387     if (is_exclusive) {
   1388       do_store = local_monitor_.IsExclusive(address, access_size) &&
   1389                  global_monitor_.IsExclusive(address, access_size);
   1390       set_wreg(rs, do_store ? 0 : 1);
   1391 
   1392       //  - All exclusive stores explicitly clear the local monitor.
   1393       local_monitor_.Clear();
   1394     } else {
   1395       //  - Any other store can clear the local monitor as a side effect.
   1396       local_monitor_.MaybeClear();
   1397     }
   1398 
   1399     if (do_store) {
   1400       switch (op) {
   1401         case STXRB_w:
   1402         case STLXRB_w:
   1403         case STLRB_w:
   1404           Memory::Write<uint8_t>(address, wreg(rt));
   1405           break;
   1406         case STXRH_w:
   1407         case STLXRH_w:
   1408         case STLRH_w:
   1409           Memory::Write<uint16_t>(address, wreg(rt));
   1410           break;
   1411         case STXR_w:
   1412         case STLXR_w:
   1413         case STLR_w:
   1414           Memory::Write<uint32_t>(address, wreg(rt));
   1415           break;
   1416         case STXR_x:
   1417         case STLXR_x:
   1418         case STLR_x:
   1419           Memory::Write<uint64_t>(address, xreg(rt));
   1420           break;
   1421         case STXP_w:
   1422         case STLXP_w:
   1423           Memory::Write<uint32_t>(address, wreg(rt));
   1424           Memory::Write<uint32_t>(address + element_size, wreg(rt2));
   1425           break;
   1426         case STXP_x:
   1427         case STLXP_x:
   1428           Memory::Write<uint64_t>(address, xreg(rt));
   1429           Memory::Write<uint64_t>(address + element_size, xreg(rt2));
   1430           break;
   1431         default:
   1432           VIXL_UNREACHABLE();
   1433       }
   1434 
   1435       LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
   1436       if (is_pair) {
   1437         LogWrite(address + element_size, rt2,
   1438                  GetPrintRegisterFormatForSize(element_size));
   1439       }
   1440     }
   1441   }
   1442 }
   1443 
   1444 
   1445 void Simulator::VisitLoadLiteral(const Instruction* instr) {
   1446   unsigned rt = instr->Rt();
   1447   uint64_t address = instr->LiteralAddress<uint64_t>();
   1448 
   1449   // Verify that the calculated address is available to the host.
   1450   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1451 
   1452   switch (instr->Mask(LoadLiteralMask)) {
   1453     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
   1454     // print a more detailed log.
   1455     case LDR_w_lit:
   1456       set_wreg(rt, Memory::Read<uint32_t>(address), NoRegLog);
   1457       LogRead(address, rt, kPrintWReg);
   1458       break;
   1459     case LDR_x_lit:
   1460       set_xreg(rt, Memory::Read<uint64_t>(address), NoRegLog);
   1461       LogRead(address, rt, kPrintXReg);
   1462       break;
   1463     case LDR_s_lit:
   1464       set_sreg(rt, Memory::Read<float>(address), NoRegLog);
   1465       LogVRead(address, rt, kPrintSReg);
   1466       break;
   1467     case LDR_d_lit:
   1468       set_dreg(rt, Memory::Read<double>(address), NoRegLog);
   1469       LogVRead(address, rt, kPrintDReg);
   1470       break;
   1471     case LDR_q_lit:
   1472       set_qreg(rt, Memory::Read<qreg_t>(address), NoRegLog);
   1473       LogVRead(address, rt, kPrintReg1Q);
   1474       break;
   1475     case LDRSW_x_lit:
   1476       set_xreg(rt, Memory::Read<int32_t>(address), NoRegLog);
   1477       LogRead(address, rt, kPrintWReg);
   1478       break;
   1479 
   1480     // Ignore prfm hint instructions.
   1481     case PRFM_lit: break;
   1482 
   1483     default: VIXL_UNREACHABLE();
   1484   }
   1485 
   1486   local_monitor_.MaybeClear();
   1487 }
   1488 
   1489 
   1490 uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
   1491                                        int64_t offset,
   1492                                        AddrMode addrmode) {
   1493   uint64_t address = xreg(addr_reg, Reg31IsStackPointer);
   1494 
   1495   if ((addr_reg == 31) && ((address % 16) != 0)) {
   1496     // When the base register is SP the stack pointer is required to be
   1497     // quadword aligned prior to the address calculation and write-backs.
   1498     // Misalignment will cause a stack alignment fault.
   1499     VIXL_ALIGNMENT_EXCEPTION();
   1500   }
   1501 
   1502   if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
   1503     VIXL_ASSERT(offset != 0);
   1504     // Only preindex should log the register update here. For Postindex, the
   1505     // update will be printed automatically by LogWrittenRegisters _after_ the
   1506     // memory access itself is logged.
   1507     RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
   1508     set_xreg(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
   1509   }
   1510 
   1511   if ((addrmode == Offset) || (addrmode == PreIndex)) {
   1512     address += offset;
   1513   }
   1514 
   1515   // Verify that the calculated address is available to the host.
   1516   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1517 
   1518   return static_cast<uintptr_t>(address);
   1519 }
   1520 
   1521 
   1522 void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
   1523   MoveWideImmediateOp mov_op =
   1524     static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
   1525   int64_t new_xn_val = 0;
   1526 
   1527   bool is_64_bits = instr->SixtyFourBits() == 1;
   1528   // Shift is limited for W operations.
   1529   VIXL_ASSERT(is_64_bits || (instr->ShiftMoveWide() < 2));
   1530 
   1531   // Get the shifted immediate.
   1532   int64_t shift = instr->ShiftMoveWide() * 16;
   1533   int64_t shifted_imm16 = static_cast<int64_t>(instr->ImmMoveWide()) << shift;
   1534 
   1535   // Compute the new value.
   1536   switch (mov_op) {
   1537     case MOVN_w:
   1538     case MOVN_x: {
   1539         new_xn_val = ~shifted_imm16;
   1540         if (!is_64_bits) new_xn_val &= kWRegMask;
   1541       break;
   1542     }
   1543     case MOVK_w:
   1544     case MOVK_x: {
   1545         unsigned reg_code = instr->Rd();
   1546         int64_t prev_xn_val = is_64_bits ? xreg(reg_code)
   1547                                          : wreg(reg_code);
   1548         new_xn_val =
   1549             (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
   1550       break;
   1551     }
   1552     case MOVZ_w:
   1553     case MOVZ_x: {
   1554         new_xn_val = shifted_imm16;
   1555       break;
   1556     }
   1557     default:
   1558       VIXL_UNREACHABLE();
   1559   }
   1560 
   1561   // Update the destination register.
   1562   set_xreg(instr->Rd(), new_xn_val);
   1563 }
   1564 
   1565 
   1566 void Simulator::VisitConditionalSelect(const Instruction* instr) {
   1567   uint64_t new_val = xreg(instr->Rn());
   1568 
   1569   if (ConditionFailed(static_cast<Condition>(instr->Condition()))) {
   1570     new_val = xreg(instr->Rm());
   1571     switch (instr->Mask(ConditionalSelectMask)) {
   1572       case CSEL_w:
   1573       case CSEL_x: break;
   1574       case CSINC_w:
   1575       case CSINC_x: new_val++; break;
   1576       case CSINV_w:
   1577       case CSINV_x: new_val = ~new_val; break;
   1578       case CSNEG_w:
   1579       case CSNEG_x: new_val = -new_val; break;
   1580       default: VIXL_UNIMPLEMENTED();
   1581     }
   1582   }
   1583   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   1584   set_reg(reg_size, instr->Rd(), new_val);
   1585 }
   1586 
   1587 
   1588 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
   1589   unsigned dst = instr->Rd();
   1590   unsigned src = instr->Rn();
   1591 
   1592   switch (instr->Mask(DataProcessing1SourceMask)) {
   1593     case RBIT_w: set_wreg(dst, ReverseBits(wreg(src))); break;
   1594     case RBIT_x: set_xreg(dst, ReverseBits(xreg(src))); break;
   1595     case REV16_w: set_wreg(dst, ReverseBytes(wreg(src), 1)); break;
   1596     case REV16_x: set_xreg(dst, ReverseBytes(xreg(src), 1)); break;
   1597     case REV_w: set_wreg(dst, ReverseBytes(wreg(src), 2)); break;
   1598     case REV32_x: set_xreg(dst, ReverseBytes(xreg(src), 2)); break;
   1599     case REV_x: set_xreg(dst, ReverseBytes(xreg(src), 3)); break;
   1600     case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src))); break;
   1601     case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src))); break;
   1602     case CLS_w: {
   1603       set_wreg(dst, CountLeadingSignBits(wreg(src)));
   1604       break;
   1605     }
   1606     case CLS_x: {
   1607       set_xreg(dst, CountLeadingSignBits(xreg(src)));
   1608       break;
   1609     }
   1610     default: VIXL_UNIMPLEMENTED();
   1611   }
   1612 }
   1613 
   1614 
   1615 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
   1616   VIXL_ASSERT((n > 32) && (n <= 64));
   1617   for (unsigned i = (n - 1); i >= 32; i--) {
   1618     if (((data >> i) & 1) != 0) {
   1619       uint64_t polysh32 = (uint64_t)poly << (i - 32);
   1620       uint64_t mask = (UINT64_C(1) << i) - 1;
   1621       data = ((data & mask) ^ polysh32);
   1622     }
   1623   }
   1624   return data & 0xffffffff;
   1625 }
   1626 
   1627 
   1628 template <typename T>
   1629 uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
   1630   unsigned size = sizeof(val) * 8;  // Number of bits in type T.
   1631   VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
   1632   uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
   1633   uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
   1634   return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
   1635 }
   1636 
   1637 
   1638 uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
   1639   // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
   1640   // the CRC of each 32-bit word sequentially.
   1641   acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
   1642   return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
   1643 }
   1644 
   1645 
   1646 void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
   1647   Shift shift_op = NO_SHIFT;
   1648   int64_t result = 0;
   1649   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   1650 
   1651   switch (instr->Mask(DataProcessing2SourceMask)) {
   1652     case SDIV_w: {
   1653       int32_t rn = wreg(instr->Rn());
   1654       int32_t rm = wreg(instr->Rm());
   1655       if ((rn == kWMinInt) && (rm == -1)) {
   1656         result = kWMinInt;
   1657       } else if (rm == 0) {
   1658         // Division by zero can be trapped, but not on A-class processors.
   1659         result = 0;
   1660       } else {
   1661         result = rn / rm;
   1662       }
   1663       break;
   1664     }
   1665     case SDIV_x: {
   1666       int64_t rn = xreg(instr->Rn());
   1667       int64_t rm = xreg(instr->Rm());
   1668       if ((rn == kXMinInt) && (rm == -1)) {
   1669         result = kXMinInt;
   1670       } else if (rm == 0) {
   1671         // Division by zero can be trapped, but not on A-class processors.
   1672         result = 0;
   1673       } else {
   1674         result = rn / rm;
   1675       }
   1676       break;
   1677     }
   1678     case UDIV_w: {
   1679       uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
   1680       uint32_t rm = static_cast<uint32_t>(wreg(instr->Rm()));
   1681       if (rm == 0) {
   1682         // Division by zero can be trapped, but not on A-class processors.
   1683         result = 0;
   1684       } else {
   1685         result = rn / rm;
   1686       }
   1687       break;
   1688     }
   1689     case UDIV_x: {
   1690       uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
   1691       uint64_t rm = static_cast<uint64_t>(xreg(instr->Rm()));
   1692       if (rm == 0) {
   1693         // Division by zero can be trapped, but not on A-class processors.
   1694         result = 0;
   1695       } else {
   1696         result = rn / rm;
   1697       }
   1698       break;
   1699     }
   1700     case LSLV_w:
   1701     case LSLV_x: shift_op = LSL; break;
   1702     case LSRV_w:
   1703     case LSRV_x: shift_op = LSR; break;
   1704     case ASRV_w:
   1705     case ASRV_x: shift_op = ASR; break;
   1706     case RORV_w:
   1707     case RORV_x: shift_op = ROR; break;
   1708     case CRC32B: {
   1709       uint32_t acc = reg<uint32_t>(instr->Rn());
   1710       uint8_t  val = reg<uint8_t>(instr->Rm());
   1711       result = Crc32Checksum(acc, val, CRC32_POLY);
   1712       break;
   1713     }
   1714     case CRC32H: {
   1715       uint32_t acc = reg<uint32_t>(instr->Rn());
   1716       uint16_t val = reg<uint16_t>(instr->Rm());
   1717       result = Crc32Checksum(acc, val, CRC32_POLY);
   1718       break;
   1719     }
   1720     case CRC32W: {
   1721       uint32_t acc = reg<uint32_t>(instr->Rn());
   1722       uint32_t val = reg<uint32_t>(instr->Rm());
   1723       result = Crc32Checksum(acc, val, CRC32_POLY);
   1724       break;
   1725     }
   1726     case CRC32X: {
   1727       uint32_t acc = reg<uint32_t>(instr->Rn());
   1728       uint64_t val = reg<uint64_t>(instr->Rm());
   1729       result = Crc32Checksum(acc, val, CRC32_POLY);
   1730       reg_size = kWRegSize;
   1731       break;
   1732     }
   1733     case CRC32CB: {
   1734       uint32_t acc = reg<uint32_t>(instr->Rn());
   1735       uint8_t  val = reg<uint8_t>(instr->Rm());
   1736       result = Crc32Checksum(acc, val, CRC32C_POLY);
   1737       break;
   1738     }
   1739     case CRC32CH: {
   1740       uint32_t acc = reg<uint32_t>(instr->Rn());
   1741       uint16_t val = reg<uint16_t>(instr->Rm());
   1742       result = Crc32Checksum(acc, val, CRC32C_POLY);
   1743       break;
   1744     }
   1745     case CRC32CW: {
   1746       uint32_t acc = reg<uint32_t>(instr->Rn());
   1747       uint32_t val = reg<uint32_t>(instr->Rm());
   1748       result = Crc32Checksum(acc, val, CRC32C_POLY);
   1749       break;
   1750     }
   1751     case CRC32CX: {
   1752       uint32_t acc = reg<uint32_t>(instr->Rn());
   1753       uint64_t val = reg<uint64_t>(instr->Rm());
   1754       result = Crc32Checksum(acc, val, CRC32C_POLY);
   1755       reg_size = kWRegSize;
   1756       break;
   1757     }
   1758     default: VIXL_UNIMPLEMENTED();
   1759   }
   1760 
   1761   if (shift_op != NO_SHIFT) {
   1762     // Shift distance encoded in the least-significant five/six bits of the
   1763     // register.
   1764     int mask = (instr->SixtyFourBits() == 1) ? 0x3f : 0x1f;
   1765     unsigned shift = wreg(instr->Rm()) & mask;
   1766     result = ShiftOperand(reg_size, reg(reg_size, instr->Rn()), shift_op,
   1767                           shift);
   1768   }
   1769   set_reg(reg_size, instr->Rd(), result);
   1770 }
   1771 
   1772 
   1773 // The algorithm used is adapted from the one described in section 8.2 of
   1774 //   Hacker's Delight, by Henry S. Warren, Jr.
   1775 // It assumes that a right shift on a signed integer is an arithmetic shift.
   1776 // Type T must be either uint64_t or int64_t.
   1777 template <typename T>
   1778 static T MultiplyHigh(T u, T v) {
   1779   uint64_t u0, v0, w0;
   1780   T u1, v1, w1, w2, t;
   1781 
   1782   VIXL_ASSERT(sizeof(u) == sizeof(u0));
   1783 
   1784   u0 = u & 0xffffffff;
   1785   u1 = u >> 32;
   1786   v0 = v & 0xffffffff;
   1787   v1 = v >> 32;
   1788 
   1789   w0 = u0 * v0;
   1790   t = u1 * v0 + (w0 >> 32);
   1791   w1 = t & 0xffffffff;
   1792   w2 = t >> 32;
   1793   w1 = u0 * v1 + w1;
   1794 
   1795   return u1 * v1 + w2 + (w1 >> 32);
   1796 }
   1797 
   1798 
   1799 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
   1800   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   1801 
   1802   int64_t result = 0;
   1803   // Extract and sign- or zero-extend 32-bit arguments for widening operations.
   1804   uint64_t rn_u32 = reg<uint32_t>(instr->Rn());
   1805   uint64_t rm_u32 = reg<uint32_t>(instr->Rm());
   1806   int64_t rn_s32 = reg<int32_t>(instr->Rn());
   1807   int64_t rm_s32 = reg<int32_t>(instr->Rm());
   1808   switch (instr->Mask(DataProcessing3SourceMask)) {
   1809     case MADD_w:
   1810     case MADD_x:
   1811       result = xreg(instr->Ra()) + (xreg(instr->Rn()) * xreg(instr->Rm()));
   1812       break;
   1813     case MSUB_w:
   1814     case MSUB_x:
   1815       result = xreg(instr->Ra()) - (xreg(instr->Rn()) * xreg(instr->Rm()));
   1816       break;
   1817     case SMADDL_x: result = xreg(instr->Ra()) + (rn_s32 * rm_s32); break;
   1818     case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break;
   1819     case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break;
   1820     case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break;
   1821     case UMULH_x:
   1822       result = MultiplyHigh(reg<uint64_t>(instr->Rn()),
   1823                             reg<uint64_t>(instr->Rm()));
   1824       break;
   1825     case SMULH_x:
   1826       result = MultiplyHigh(xreg(instr->Rn()), xreg(instr->Rm()));
   1827       break;
   1828     default: VIXL_UNIMPLEMENTED();
   1829   }
   1830   set_reg(reg_size, instr->Rd(), result);
   1831 }
   1832 
   1833 
   1834 void Simulator::VisitBitfield(const Instruction* instr) {
   1835   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   1836   int64_t reg_mask = instr->SixtyFourBits() ? kXRegMask : kWRegMask;
   1837   int64_t R = instr->ImmR();
   1838   int64_t S = instr->ImmS();
   1839   int64_t diff = S - R;
   1840   int64_t mask;
   1841   if (diff >= 0) {
   1842     mask = (diff < (reg_size - 1)) ? (INT64_C(1) << (diff + 1)) - 1
   1843                                    : reg_mask;
   1844   } else {
   1845     mask = (INT64_C(1) << (S + 1)) - 1;
   1846     mask = (static_cast<uint64_t>(mask) >> R) | (mask << (reg_size - R));
   1847     diff += reg_size;
   1848   }
   1849 
   1850   // inzero indicates if the extracted bitfield is inserted into the
   1851   // destination register value or in zero.
   1852   // If extend is true, extend the sign of the extracted bitfield.
   1853   bool inzero = false;
   1854   bool extend = false;
   1855   switch (instr->Mask(BitfieldMask)) {
   1856     case BFM_x:
   1857     case BFM_w:
   1858       break;
   1859     case SBFM_x:
   1860     case SBFM_w:
   1861       inzero = true;
   1862       extend = true;
   1863       break;
   1864     case UBFM_x:
   1865     case UBFM_w:
   1866       inzero = true;
   1867       break;
   1868     default:
   1869       VIXL_UNIMPLEMENTED();
   1870   }
   1871 
   1872   int64_t dst = inzero ? 0 : reg(reg_size, instr->Rd());
   1873   int64_t src = reg(reg_size, instr->Rn());
   1874   // Rotate source bitfield into place.
   1875   int64_t result = (static_cast<uint64_t>(src) >> R) | (src << (reg_size - R));
   1876   // Determine the sign extension.
   1877   int64_t topbits = ((INT64_C(1) << (reg_size - diff - 1)) - 1) << (diff + 1);
   1878   int64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
   1879 
   1880   // Merge sign extension, dest/zero and bitfield.
   1881   result = signbits | (result & mask) | (dst & ~mask);
   1882 
   1883   set_reg(reg_size, instr->Rd(), result);
   1884 }
   1885 
   1886 
   1887 void Simulator::VisitExtract(const Instruction* instr) {
   1888   unsigned lsb = instr->ImmS();
   1889   unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize
   1890                                                     : kWRegSize;
   1891   uint64_t low_res = static_cast<uint64_t>(reg(reg_size, instr->Rm())) >> lsb;
   1892   uint64_t high_res =
   1893       (lsb == 0) ? 0 : reg(reg_size, instr->Rn()) << (reg_size - lsb);
   1894   set_reg(reg_size, instr->Rd(), low_res | high_res);
   1895 }
   1896 
   1897 
   1898 void Simulator::VisitFPImmediate(const Instruction* instr) {
   1899   AssertSupportedFPCR();
   1900 
   1901   unsigned dest = instr->Rd();
   1902   switch (instr->Mask(FPImmediateMask)) {
   1903     case FMOV_s_imm: set_sreg(dest, instr->ImmFP32()); break;
   1904     case FMOV_d_imm: set_dreg(dest, instr->ImmFP64()); break;
   1905     default: VIXL_UNREACHABLE();
   1906   }
   1907 }
   1908 
   1909 
   1910 void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
   1911   AssertSupportedFPCR();
   1912 
   1913   unsigned dst = instr->Rd();
   1914   unsigned src = instr->Rn();
   1915 
   1916   FPRounding round = RMode();
   1917 
   1918   switch (instr->Mask(FPIntegerConvertMask)) {
   1919     case FCVTAS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieAway)); break;
   1920     case FCVTAS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieAway)); break;
   1921     case FCVTAS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieAway)); break;
   1922     case FCVTAS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieAway)); break;
   1923     case FCVTAU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieAway)); break;
   1924     case FCVTAU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieAway)); break;
   1925     case FCVTAU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieAway)); break;
   1926     case FCVTAU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieAway)); break;
   1927     case FCVTMS_ws:
   1928       set_wreg(dst, FPToInt32(sreg(src), FPNegativeInfinity));
   1929       break;
   1930     case FCVTMS_xs:
   1931       set_xreg(dst, FPToInt64(sreg(src), FPNegativeInfinity));
   1932       break;
   1933     case FCVTMS_wd:
   1934       set_wreg(dst, FPToInt32(dreg(src), FPNegativeInfinity));
   1935       break;
   1936     case FCVTMS_xd:
   1937       set_xreg(dst, FPToInt64(dreg(src), FPNegativeInfinity));
   1938       break;
   1939     case FCVTMU_ws:
   1940       set_wreg(dst, FPToUInt32(sreg(src), FPNegativeInfinity));
   1941       break;
   1942     case FCVTMU_xs:
   1943       set_xreg(dst, FPToUInt64(sreg(src), FPNegativeInfinity));
   1944       break;
   1945     case FCVTMU_wd:
   1946       set_wreg(dst, FPToUInt32(dreg(src), FPNegativeInfinity));
   1947       break;
   1948     case FCVTMU_xd:
   1949       set_xreg(dst, FPToUInt64(dreg(src), FPNegativeInfinity));
   1950       break;
   1951     case FCVTPS_ws:
   1952       set_wreg(dst, FPToInt32(sreg(src), FPPositiveInfinity));
   1953       break;
   1954     case FCVTPS_xs:
   1955       set_xreg(dst, FPToInt64(sreg(src), FPPositiveInfinity));
   1956       break;
   1957     case FCVTPS_wd:
   1958       set_wreg(dst, FPToInt32(dreg(src), FPPositiveInfinity));
   1959       break;
   1960     case FCVTPS_xd:
   1961       set_xreg(dst, FPToInt64(dreg(src), FPPositiveInfinity));
   1962       break;
   1963     case FCVTPU_ws:
   1964       set_wreg(dst, FPToUInt32(sreg(src), FPPositiveInfinity));
   1965       break;
   1966     case FCVTPU_xs:
   1967       set_xreg(dst, FPToUInt64(sreg(src), FPPositiveInfinity));
   1968       break;
   1969     case FCVTPU_wd:
   1970       set_wreg(dst, FPToUInt32(dreg(src), FPPositiveInfinity));
   1971       break;
   1972     case FCVTPU_xd:
   1973       set_xreg(dst, FPToUInt64(dreg(src), FPPositiveInfinity));
   1974       break;
   1975     case FCVTNS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieEven)); break;
   1976     case FCVTNS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieEven)); break;
   1977     case FCVTNS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieEven)); break;
   1978     case FCVTNS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieEven)); break;
   1979     case FCVTNU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieEven)); break;
   1980     case FCVTNU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieEven)); break;
   1981     case FCVTNU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieEven)); break;
   1982     case FCVTNU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieEven)); break;
   1983     case FCVTZS_ws: set_wreg(dst, FPToInt32(sreg(src), FPZero)); break;
   1984     case FCVTZS_xs: set_xreg(dst, FPToInt64(sreg(src), FPZero)); break;
   1985     case FCVTZS_wd: set_wreg(dst, FPToInt32(dreg(src), FPZero)); break;
   1986     case FCVTZS_xd: set_xreg(dst, FPToInt64(dreg(src), FPZero)); break;
   1987     case FCVTZU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPZero)); break;
   1988     case FCVTZU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPZero)); break;
   1989     case FCVTZU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPZero)); break;
   1990     case FCVTZU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPZero)); break;
   1991     case FMOV_ws: set_wreg(dst, sreg_bits(src)); break;
   1992     case FMOV_xd: set_xreg(dst, dreg_bits(src)); break;
   1993     case FMOV_sw: set_sreg_bits(dst, wreg(src)); break;
   1994     case FMOV_dx: set_dreg_bits(dst, xreg(src)); break;
   1995     case FMOV_d1_x:
   1996       LogicVRegister(vreg(dst)).SetUint(kFormatD, 1, xreg(src));
   1997       break;
   1998     case FMOV_x_d1:
   1999       set_xreg(dst, LogicVRegister(vreg(src)).Uint(kFormatD, 1));
   2000       break;
   2001 
   2002     // A 32-bit input can be handled in the same way as a 64-bit input, since
   2003     // the sign- or zero-extension will not affect the conversion.
   2004     case SCVTF_dx: set_dreg(dst, FixedToDouble(xreg(src), 0, round)); break;
   2005     case SCVTF_dw: set_dreg(dst, FixedToDouble(wreg(src), 0, round)); break;
   2006     case UCVTF_dx: set_dreg(dst, UFixedToDouble(xreg(src), 0, round)); break;
   2007     case UCVTF_dw: {
   2008       set_dreg(dst, UFixedToDouble(static_cast<uint32_t>(wreg(src)), 0, round));
   2009       break;
   2010     }
   2011     case SCVTF_sx: set_sreg(dst, FixedToFloat(xreg(src), 0, round)); break;
   2012     case SCVTF_sw: set_sreg(dst, FixedToFloat(wreg(src), 0, round)); break;
   2013     case UCVTF_sx: set_sreg(dst, UFixedToFloat(xreg(src), 0, round)); break;
   2014     case UCVTF_sw: {
   2015       set_sreg(dst, UFixedToFloat(static_cast<uint32_t>(wreg(src)), 0, round));
   2016       break;
   2017     }
   2018 
   2019     default: VIXL_UNREACHABLE();
   2020   }
   2021 }
   2022 
   2023 
   2024 void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
   2025   AssertSupportedFPCR();
   2026 
   2027   unsigned dst = instr->Rd();
   2028   unsigned src = instr->Rn();
   2029   int fbits = 64 - instr->FPScale();
   2030 
   2031   FPRounding round = RMode();
   2032 
   2033   switch (instr->Mask(FPFixedPointConvertMask)) {
   2034     // A 32-bit input can be handled in the same way as a 64-bit input, since
   2035     // the sign- or zero-extension will not affect the conversion.
   2036     case SCVTF_dx_fixed:
   2037       set_dreg(dst, FixedToDouble(xreg(src), fbits, round));
   2038       break;
   2039     case SCVTF_dw_fixed:
   2040       set_dreg(dst, FixedToDouble(wreg(src), fbits, round));
   2041       break;
   2042     case UCVTF_dx_fixed:
   2043       set_dreg(dst, UFixedToDouble(xreg(src), fbits, round));
   2044       break;
   2045     case UCVTF_dw_fixed: {
   2046       set_dreg(dst,
   2047                UFixedToDouble(static_cast<uint32_t>(wreg(src)), fbits, round));
   2048       break;
   2049     }
   2050     case SCVTF_sx_fixed:
   2051       set_sreg(dst, FixedToFloat(xreg(src), fbits, round));
   2052       break;
   2053     case SCVTF_sw_fixed:
   2054       set_sreg(dst, FixedToFloat(wreg(src), fbits, round));
   2055       break;
   2056     case UCVTF_sx_fixed:
   2057       set_sreg(dst, UFixedToFloat(xreg(src), fbits, round));
   2058       break;
   2059     case UCVTF_sw_fixed: {
   2060       set_sreg(dst,
   2061                UFixedToFloat(static_cast<uint32_t>(wreg(src)), fbits, round));
   2062       break;
   2063     }
   2064     case FCVTZS_xd_fixed:
   2065       set_xreg(dst, FPToInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
   2066       break;
   2067     case FCVTZS_wd_fixed:
   2068       set_wreg(dst, FPToInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
   2069       break;
   2070     case FCVTZU_xd_fixed:
   2071       set_xreg(dst, FPToUInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
   2072       break;
   2073     case FCVTZU_wd_fixed:
   2074       set_wreg(dst, FPToUInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
   2075       break;
   2076     case FCVTZS_xs_fixed:
   2077       set_xreg(dst, FPToInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2078       break;
   2079     case FCVTZS_ws_fixed:
   2080       set_wreg(dst, FPToInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2081       break;
   2082     case FCVTZU_xs_fixed:
   2083       set_xreg(dst, FPToUInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2084       break;
   2085     case FCVTZU_ws_fixed:
   2086       set_wreg(dst, FPToUInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2087       break;
   2088     default: VIXL_UNREACHABLE();
   2089   }
   2090 }
   2091 
   2092 
   2093 void Simulator::VisitFPCompare(const Instruction* instr) {
   2094   AssertSupportedFPCR();
   2095 
   2096   FPTrapFlags trap = DisableTrap;
   2097   switch (instr->Mask(FPCompareMask)) {
   2098     case FCMPE_s: trap = EnableTrap; VIXL_FALLTHROUGH();
   2099     case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); break;
   2100     case FCMPE_d: trap = EnableTrap; VIXL_FALLTHROUGH();
   2101     case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); break;
   2102     case FCMPE_s_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
   2103     case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f, trap); break;
   2104     case FCMPE_d_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
   2105     case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0, trap); break;
   2106     default: VIXL_UNIMPLEMENTED();
   2107   }
   2108 }
   2109 
   2110 
   2111 void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
   2112   AssertSupportedFPCR();
   2113 
   2114   FPTrapFlags trap = DisableTrap;
   2115   switch (instr->Mask(FPConditionalCompareMask)) {
   2116     case FCCMPE_s: trap = EnableTrap;
   2117       VIXL_FALLTHROUGH();
   2118     case FCCMP_s:
   2119       if (ConditionPassed(instr->Condition())) {
   2120         FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap);
   2121       } else {
   2122         nzcv().SetFlags(instr->Nzcv());
   2123         LogSystemRegister(NZCV);
   2124       }
   2125       break;
   2126     case FCCMPE_d: trap = EnableTrap;
   2127       VIXL_FALLTHROUGH();
   2128     case FCCMP_d:
   2129       if (ConditionPassed(instr->Condition())) {
   2130         FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap);
   2131       } else {
   2132         nzcv().SetFlags(instr->Nzcv());
   2133         LogSystemRegister(NZCV);
   2134       }
   2135       break;
   2136     default: VIXL_UNIMPLEMENTED();
   2137   }
   2138 }
   2139 
   2140 
   2141 void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
   2142   AssertSupportedFPCR();
   2143 
   2144   Instr selected;
   2145   if (ConditionPassed(instr->Condition())) {
   2146     selected = instr->Rn();
   2147   } else {
   2148     selected = instr->Rm();
   2149   }
   2150 
   2151   switch (instr->Mask(FPConditionalSelectMask)) {
   2152     case FCSEL_s: set_sreg(instr->Rd(), sreg(selected)); break;
   2153     case FCSEL_d: set_dreg(instr->Rd(), dreg(selected)); break;
   2154     default: VIXL_UNIMPLEMENTED();
   2155   }
   2156 }
   2157 
   2158 
   2159 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
   2160   AssertSupportedFPCR();
   2161 
   2162   FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   2163   VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
   2164   SimVRegister& rd = vreg(instr->Rd());
   2165   SimVRegister& rn = vreg(instr->Rn());
   2166   bool inexact_exception = false;
   2167 
   2168   unsigned fd = instr->Rd();
   2169   unsigned fn = instr->Rn();
   2170 
   2171   switch (instr->Mask(FPDataProcessing1SourceMask)) {
   2172     case FMOV_s: set_sreg(fd, sreg(fn)); return;
   2173     case FMOV_d: set_dreg(fd, dreg(fn)); return;
   2174     case FABS_s: fabs_(kFormatS, vreg(fd), vreg(fn)); return;
   2175     case FABS_d: fabs_(kFormatD, vreg(fd), vreg(fn)); return;
   2176     case FNEG_s: fneg(kFormatS, vreg(fd), vreg(fn)); return;
   2177     case FNEG_d: fneg(kFormatD, vreg(fd), vreg(fn)); return;
   2178     case FCVT_ds: set_dreg(fd, FPToDouble(sreg(fn))); return;
   2179     case FCVT_sd: set_sreg(fd, FPToFloat(dreg(fn), FPTieEven)); return;
   2180     case FCVT_hs: set_hreg(fd, FPToFloat16(sreg(fn), FPTieEven)); return;
   2181     case FCVT_sh: set_sreg(fd, FPToFloat(hreg(fn))); return;
   2182     case FCVT_dh: set_dreg(fd, FPToDouble(FPToFloat(hreg(fn)))); return;
   2183     case FCVT_hd: set_hreg(fd, FPToFloat16(dreg(fn), FPTieEven)); return;
   2184     case FSQRT_s:
   2185     case FSQRT_d: fsqrt(vform, rd, rn); return;
   2186     case FRINTI_s:
   2187     case FRINTI_d: break;  // Use FPCR rounding mode.
   2188     case FRINTX_s:
   2189     case FRINTX_d: inexact_exception = true; break;
   2190     case FRINTA_s:
   2191     case FRINTA_d: fpcr_rounding = FPTieAway; break;
   2192     case FRINTM_s:
   2193     case FRINTM_d: fpcr_rounding = FPNegativeInfinity; break;
   2194     case FRINTN_s:
   2195     case FRINTN_d: fpcr_rounding = FPTieEven; break;
   2196     case FRINTP_s:
   2197     case FRINTP_d: fpcr_rounding = FPPositiveInfinity; break;
   2198     case FRINTZ_s:
   2199     case FRINTZ_d: fpcr_rounding = FPZero; break;
   2200     default: VIXL_UNIMPLEMENTED();
   2201   }
   2202 
   2203   // Only FRINT* instructions fall through the switch above.
   2204   frint(vform, rd, rn, fpcr_rounding, inexact_exception);
   2205 }
   2206 
   2207 
   2208 void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
   2209   AssertSupportedFPCR();
   2210 
   2211   VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
   2212   SimVRegister& rd = vreg(instr->Rd());
   2213   SimVRegister& rn = vreg(instr->Rn());
   2214   SimVRegister& rm = vreg(instr->Rm());
   2215 
   2216   switch (instr->Mask(FPDataProcessing2SourceMask)) {
   2217     case FADD_s:
   2218     case FADD_d: fadd(vform, rd, rn, rm); break;
   2219     case FSUB_s:
   2220     case FSUB_d: fsub(vform, rd, rn, rm); break;
   2221     case FMUL_s:
   2222     case FMUL_d: fmul(vform, rd, rn, rm); break;
   2223     case FNMUL_s:
   2224     case FNMUL_d: fnmul(vform, rd, rn, rm); break;
   2225     case FDIV_s:
   2226     case FDIV_d: fdiv(vform, rd, rn, rm); break;
   2227     case FMAX_s:
   2228     case FMAX_d: fmax(vform, rd, rn, rm); break;
   2229     case FMIN_s:
   2230     case FMIN_d: fmin(vform, rd, rn, rm); break;
   2231     case FMAXNM_s:
   2232     case FMAXNM_d: fmaxnm(vform, rd, rn, rm); break;
   2233     case FMINNM_s:
   2234     case FMINNM_d: fminnm(vform, rd, rn, rm); break;
   2235     default:
   2236       VIXL_UNREACHABLE();
   2237   }
   2238 }
   2239 
   2240 
   2241 void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
   2242   AssertSupportedFPCR();
   2243 
   2244   unsigned fd = instr->Rd();
   2245   unsigned fn = instr->Rn();
   2246   unsigned fm = instr->Rm();
   2247   unsigned fa = instr->Ra();
   2248 
   2249   switch (instr->Mask(FPDataProcessing3SourceMask)) {
   2250     // fd = fa +/- (fn * fm)
   2251     case FMADD_s: set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); break;
   2252     case FMSUB_s: set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); break;
   2253     case FMADD_d: set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); break;
   2254     case FMSUB_d: set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); break;
   2255     // Negated variants of the above.
   2256     case FNMADD_s:
   2257       set_sreg(fd, FPMulAdd(-sreg(fa), -sreg(fn), sreg(fm)));
   2258       break;
   2259     case FNMSUB_s:
   2260       set_sreg(fd, FPMulAdd(-sreg(fa), sreg(fn), sreg(fm)));
   2261       break;
   2262     case FNMADD_d:
   2263       set_dreg(fd, FPMulAdd(-dreg(fa), -dreg(fn), dreg(fm)));
   2264       break;
   2265     case FNMSUB_d:
   2266       set_dreg(fd, FPMulAdd(-dreg(fa), dreg(fn), dreg(fm)));
   2267       break;
   2268     default: VIXL_UNIMPLEMENTED();
   2269   }
   2270 }
   2271 
   2272 
   2273 bool Simulator::FPProcessNaNs(const Instruction* instr) {
   2274   unsigned fd = instr->Rd();
   2275   unsigned fn = instr->Rn();
   2276   unsigned fm = instr->Rm();
   2277   bool done = false;
   2278 
   2279   if (instr->Mask(FP64) == FP64) {
   2280     double result = FPProcessNaNs(dreg(fn), dreg(fm));
   2281     if (std::isnan(result)) {
   2282       set_dreg(fd, result);
   2283       done = true;
   2284     }
   2285   } else {
   2286     float result = FPProcessNaNs(sreg(fn), sreg(fm));
   2287     if (std::isnan(result)) {
   2288       set_sreg(fd, result);
   2289       done = true;
   2290     }
   2291   }
   2292 
   2293   return done;
   2294 }
   2295 
   2296 
   2297 void Simulator::SysOp_W(int op, int64_t val) {
   2298   switch (op) {
   2299     case IVAU:
   2300     case CVAC:
   2301     case CVAU:
   2302     case CIVAC: {
   2303       // Perform a dummy memory access to ensure that we have read access
   2304       // to the specified address.
   2305       volatile uint8_t y = Memory::Read<uint8_t>(val);
   2306       USE(y);
   2307       // TODO: Implement "case ZVA:".
   2308       break;
   2309     }
   2310     default:
   2311       VIXL_UNIMPLEMENTED();
   2312   }
   2313 }
   2314 
   2315 
   2316 void Simulator::VisitSystem(const Instruction* instr) {
   2317   // Some system instructions hijack their Op and Cp fields to represent a
   2318   // range of immediates instead of indicating a different instruction. This
   2319   // makes the decoding tricky.
   2320   if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
   2321     VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
   2322     switch (instr->Mask(SystemExclusiveMonitorMask)) {
   2323       case CLREX: {
   2324         PrintExclusiveAccessWarning();
   2325         ClearLocalMonitor();
   2326         break;
   2327       }
   2328     }
   2329   } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
   2330     switch (instr->Mask(SystemSysRegMask)) {
   2331       case MRS: {
   2332         switch (instr->ImmSystemRegister()) {
   2333           case NZCV: set_xreg(instr->Rt(), nzcv().RawValue()); break;
   2334           case FPCR: set_xreg(instr->Rt(), fpcr().RawValue()); break;
   2335           default: VIXL_UNIMPLEMENTED();
   2336         }
   2337         break;
   2338       }
   2339       case MSR: {
   2340         switch (instr->ImmSystemRegister()) {
   2341           case NZCV:
   2342             nzcv().SetRawValue(wreg(instr->Rt()));
   2343             LogSystemRegister(NZCV);
   2344             break;
   2345           case FPCR:
   2346             fpcr().SetRawValue(wreg(instr->Rt()));
   2347             LogSystemRegister(FPCR);
   2348             break;
   2349           default: VIXL_UNIMPLEMENTED();
   2350         }
   2351         break;
   2352       }
   2353     }
   2354   } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
   2355     VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
   2356     switch (instr->ImmHint()) {
   2357       case NOP: break;
   2358       default: VIXL_UNIMPLEMENTED();
   2359     }
   2360   } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
   2361     __sync_synchronize();
   2362   } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) {
   2363     switch (instr->Mask(SystemSysMask)) {
   2364       case SYS: SysOp_W(instr->SysOp(), xreg(instr->Rt())); break;
   2365       default: VIXL_UNIMPLEMENTED();
   2366     }
   2367   } else {
   2368     VIXL_UNIMPLEMENTED();
   2369   }
   2370 }
   2371 
   2372 
   2373 void Simulator::VisitException(const Instruction* instr) {
   2374   switch (instr->Mask(ExceptionMask)) {
   2375     case HLT:
   2376       switch (instr->ImmException()) {
   2377         case kUnreachableOpcode:
   2378           DoUnreachable(instr);
   2379           return;
   2380         case kTraceOpcode:
   2381           DoTrace(instr);
   2382           return;
   2383         case kLogOpcode:
   2384           DoLog(instr);
   2385           return;
   2386         case kPrintfOpcode:
   2387           DoPrintf(instr);
   2388           return;
   2389         default:
   2390           HostBreakpoint();
   2391           return;
   2392       }
   2393     case BRK:
   2394       HostBreakpoint();
   2395       return;
   2396     default:
   2397       VIXL_UNIMPLEMENTED();
   2398   }
   2399 }
   2400 
   2401 
   2402 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
   2403   VisitUnimplemented(instr);
   2404 }
   2405 
   2406 
   2407 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
   2408   VisitUnimplemented(instr);
   2409 }
   2410 
   2411 
   2412 void Simulator::VisitCryptoAES(const Instruction* instr) {
   2413   VisitUnimplemented(instr);
   2414 }
   2415 
   2416 
   2417 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
   2418   NEONFormatDecoder nfd(instr);
   2419   VectorFormat vf = nfd.GetVectorFormat();
   2420 
   2421   static const NEONFormatMap map_lp = {
   2422     {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
   2423   };
   2424   VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
   2425 
   2426   static const NEONFormatMap map_fcvtl = {
   2427     {22}, {NF_4S, NF_2D}
   2428   };
   2429   VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
   2430 
   2431   static const NEONFormatMap map_fcvtn = {
   2432     {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S}
   2433   };
   2434   VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
   2435 
   2436   SimVRegister& rd = vreg(instr->Rd());
   2437   SimVRegister& rn = vreg(instr->Rn());
   2438 
   2439   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
   2440     // These instructions all use a two bit size field, except NOT and RBIT,
   2441     // which use the field to encode the operation.
   2442     switch (instr->Mask(NEON2RegMiscMask)) {
   2443       case NEON_REV64:     rev64(vf, rd, rn); break;
   2444       case NEON_REV32:     rev32(vf, rd, rn); break;
   2445       case NEON_REV16:     rev16(vf, rd, rn); break;
   2446       case NEON_SUQADD:    suqadd(vf, rd, rn); break;
   2447       case NEON_USQADD:    usqadd(vf, rd, rn); break;
   2448       case NEON_CLS:       cls(vf, rd, rn); break;
   2449       case NEON_CLZ:       clz(vf, rd, rn); break;
   2450       case NEON_CNT:       cnt(vf, rd, rn); break;
   2451       case NEON_SQABS:     abs(vf, rd, rn).SignedSaturate(vf); break;
   2452       case NEON_SQNEG:     neg(vf, rd, rn).SignedSaturate(vf); break;
   2453       case NEON_CMGT_zero: cmp(vf, rd, rn, 0, gt); break;
   2454       case NEON_CMGE_zero: cmp(vf, rd, rn, 0, ge); break;
   2455       case NEON_CMEQ_zero: cmp(vf, rd, rn, 0, eq); break;
   2456       case NEON_CMLE_zero: cmp(vf, rd, rn, 0, le); break;
   2457       case NEON_CMLT_zero: cmp(vf, rd, rn, 0, lt); break;
   2458       case NEON_ABS:       abs(vf, rd, rn); break;
   2459       case NEON_NEG:       neg(vf, rd, rn); break;
   2460       case NEON_SADDLP:    saddlp(vf_lp, rd, rn); break;
   2461       case NEON_UADDLP:    uaddlp(vf_lp, rd, rn); break;
   2462       case NEON_SADALP:    sadalp(vf_lp, rd, rn); break;
   2463       case NEON_UADALP:    uadalp(vf_lp, rd, rn); break;
   2464       case NEON_RBIT_NOT:
   2465         vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
   2466         switch (instr->FPType()) {
   2467           case 0: not_(vf, rd, rn); break;
   2468           case 1: rbit(vf, rd, rn);; break;
   2469           default:
   2470             VIXL_UNIMPLEMENTED();
   2471         }
   2472         break;
   2473     }
   2474   } else {
   2475     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
   2476     FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   2477     bool inexact_exception = false;
   2478 
   2479     // These instructions all use a one bit size field, except XTN, SQXTUN,
   2480     // SHLL, SQXTN and UQXTN, which use a two bit size field.
   2481     switch (instr->Mask(NEON2RegMiscFPMask)) {
   2482       case NEON_FABS:   fabs_(fpf, rd, rn); return;
   2483       case NEON_FNEG:   fneg(fpf, rd, rn); return;
   2484       case NEON_FSQRT:  fsqrt(fpf, rd, rn); return;
   2485       case NEON_FCVTL:
   2486         if (instr->Mask(NEON_Q)) {
   2487           fcvtl2(vf_fcvtl, rd, rn);
   2488         } else {
   2489           fcvtl(vf_fcvtl, rd, rn);
   2490         }
   2491         return;
   2492       case NEON_FCVTN:
   2493         if (instr->Mask(NEON_Q)) {
   2494           fcvtn2(vf_fcvtn, rd, rn);
   2495         } else {
   2496           fcvtn(vf_fcvtn, rd, rn);
   2497         }
   2498         return;
   2499       case NEON_FCVTXN:
   2500         if (instr->Mask(NEON_Q)) {
   2501           fcvtxn2(vf_fcvtn, rd, rn);
   2502         } else {
   2503           fcvtxn(vf_fcvtn, rd, rn);
   2504         }
   2505         return;
   2506 
   2507       // The following instructions break from the switch statement, rather
   2508       // than return.
   2509       case NEON_FRINTI:     break;  // Use FPCR rounding mode.
   2510       case NEON_FRINTX:     inexact_exception = true; break;
   2511       case NEON_FRINTA:     fpcr_rounding = FPTieAway; break;
   2512       case NEON_FRINTM:     fpcr_rounding = FPNegativeInfinity; break;
   2513       case NEON_FRINTN:     fpcr_rounding = FPTieEven; break;
   2514       case NEON_FRINTP:     fpcr_rounding = FPPositiveInfinity; break;
   2515       case NEON_FRINTZ:     fpcr_rounding = FPZero; break;
   2516 
   2517       case NEON_FCVTNS:     fcvts(fpf, rd, rn, FPTieEven); return;
   2518       case NEON_FCVTNU:     fcvtu(fpf, rd, rn, FPTieEven); return;
   2519       case NEON_FCVTPS:     fcvts(fpf, rd, rn, FPPositiveInfinity); return;
   2520       case NEON_FCVTPU:     fcvtu(fpf, rd, rn, FPPositiveInfinity); return;
   2521       case NEON_FCVTMS:     fcvts(fpf, rd, rn, FPNegativeInfinity); return;
   2522       case NEON_FCVTMU:     fcvtu(fpf, rd, rn, FPNegativeInfinity); return;
   2523       case NEON_FCVTZS:     fcvts(fpf, rd, rn, FPZero); return;
   2524       case NEON_FCVTZU:     fcvtu(fpf, rd, rn, FPZero); return;
   2525       case NEON_FCVTAS:     fcvts(fpf, rd, rn, FPTieAway); return;
   2526       case NEON_FCVTAU:     fcvtu(fpf, rd, rn, FPTieAway); return;
   2527       case NEON_SCVTF:      scvtf(fpf, rd, rn, 0, fpcr_rounding); return;
   2528       case NEON_UCVTF:      ucvtf(fpf, rd, rn, 0, fpcr_rounding); return;
   2529       case NEON_URSQRTE:    ursqrte(fpf, rd, rn); return;
   2530       case NEON_URECPE:     urecpe(fpf, rd, rn); return;
   2531       case NEON_FRSQRTE:    frsqrte(fpf, rd, rn); return;
   2532       case NEON_FRECPE:     frecpe(fpf, rd, rn, fpcr_rounding); return;
   2533       case NEON_FCMGT_zero: fcmp_zero(fpf, rd, rn, gt); return;
   2534       case NEON_FCMGE_zero: fcmp_zero(fpf, rd, rn, ge); return;
   2535       case NEON_FCMEQ_zero: fcmp_zero(fpf, rd, rn, eq); return;
   2536       case NEON_FCMLE_zero: fcmp_zero(fpf, rd, rn, le); return;
   2537       case NEON_FCMLT_zero: fcmp_zero(fpf, rd, rn, lt); return;
   2538       default:
   2539         if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
   2540             (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
   2541           switch (instr->Mask(NEON2RegMiscMask)) {
   2542             case NEON_XTN: xtn(vf, rd, rn); return;
   2543             case NEON_SQXTN: sqxtn(vf, rd, rn); return;
   2544             case NEON_UQXTN: uqxtn(vf, rd, rn); return;
   2545             case NEON_SQXTUN: sqxtun(vf, rd, rn); return;
   2546             case NEON_SHLL:
   2547               vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   2548               if (instr->Mask(NEON_Q)) {
   2549                 shll2(vf, rd, rn);
   2550               } else {
   2551                 shll(vf, rd, rn);
   2552               }
   2553               return;
   2554             default:
   2555               VIXL_UNIMPLEMENTED();
   2556           }
   2557         } else {
   2558           VIXL_UNIMPLEMENTED();
   2559         }
   2560     }
   2561 
   2562     // Only FRINT* instructions fall through the switch above.
   2563     frint(fpf, rd, rn, fpcr_rounding, inexact_exception);
   2564   }
   2565 }
   2566 
   2567 
   2568 void Simulator::VisitNEON3Same(const Instruction* instr) {
   2569   NEONFormatDecoder nfd(instr);
   2570   SimVRegister& rd = vreg(instr->Rd());
   2571   SimVRegister& rn = vreg(instr->Rn());
   2572   SimVRegister& rm = vreg(instr->Rm());
   2573 
   2574   if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
   2575     VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
   2576     switch (instr->Mask(NEON3SameLogicalMask)) {
   2577       case NEON_AND: and_(vf, rd, rn, rm); break;
   2578       case NEON_ORR: orr(vf, rd, rn, rm); break;
   2579       case NEON_ORN: orn(vf, rd, rn, rm); break;
   2580       case NEON_EOR: eor(vf, rd, rn, rm); break;
   2581       case NEON_BIC: bic(vf, rd, rn, rm); break;
   2582       case NEON_BIF: bif(vf, rd, rn, rm); break;
   2583       case NEON_BIT: bit(vf, rd, rn, rm); break;
   2584       case NEON_BSL: bsl(vf, rd, rn, rm); break;
   2585       default:
   2586         VIXL_UNIMPLEMENTED();
   2587     }
   2588   } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
   2589     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   2590     switch (instr->Mask(NEON3SameFPMask)) {
   2591       case NEON_FADD:    fadd(vf, rd, rn, rm); break;
   2592       case NEON_FSUB:    fsub(vf, rd, rn, rm); break;
   2593       case NEON_FMUL:    fmul(vf, rd, rn, rm); break;
   2594       case NEON_FDIV:    fdiv(vf, rd, rn, rm); break;
   2595       case NEON_FMAX:    fmax(vf, rd, rn, rm); break;
   2596       case NEON_FMIN:    fmin(vf, rd, rn, rm); break;
   2597       case NEON_FMAXNM:  fmaxnm(vf, rd, rn, rm); break;
   2598       case NEON_FMINNM:  fminnm(vf, rd, rn, rm); break;
   2599       case NEON_FMLA:    fmla(vf, rd, rn, rm); break;
   2600       case NEON_FMLS:    fmls(vf, rd, rn, rm); break;
   2601       case NEON_FMULX:   fmulx(vf, rd, rn, rm); break;
   2602       case NEON_FACGE:   fabscmp(vf, rd, rn, rm, ge); break;
   2603       case NEON_FACGT:   fabscmp(vf, rd, rn, rm, gt); break;
   2604       case NEON_FCMEQ:   fcmp(vf, rd, rn, rm, eq); break;
   2605       case NEON_FCMGE:   fcmp(vf, rd, rn, rm, ge); break;
   2606       case NEON_FCMGT:   fcmp(vf, rd, rn, rm, gt); break;
   2607       case NEON_FRECPS:  frecps(vf, rd, rn, rm); break;
   2608       case NEON_FRSQRTS: frsqrts(vf, rd, rn, rm); break;
   2609       case NEON_FABD:    fabd(vf, rd, rn, rm); break;
   2610       case NEON_FADDP:   faddp(vf, rd, rn, rm); break;
   2611       case NEON_FMAXP:   fmaxp(vf, rd, rn, rm); break;
   2612       case NEON_FMAXNMP: fmaxnmp(vf, rd, rn, rm); break;
   2613       case NEON_FMINP:   fminp(vf, rd, rn, rm); break;
   2614       case NEON_FMINNMP: fminnmp(vf, rd, rn, rm); break;
   2615       default:
   2616         VIXL_UNIMPLEMENTED();
   2617     }
   2618   } else {
   2619     VectorFormat vf = nfd.GetVectorFormat();
   2620     switch (instr->Mask(NEON3SameMask)) {
   2621       case NEON_ADD:   add(vf, rd, rn, rm);  break;
   2622       case NEON_ADDP:  addp(vf, rd, rn, rm); break;
   2623       case NEON_CMEQ:  cmp(vf, rd, rn, rm, eq); break;
   2624       case NEON_CMGE:  cmp(vf, rd, rn, rm, ge); break;
   2625       case NEON_CMGT:  cmp(vf, rd, rn, rm, gt); break;
   2626       case NEON_CMHI:  cmp(vf, rd, rn, rm, hi); break;
   2627       case NEON_CMHS:  cmp(vf, rd, rn, rm, hs); break;
   2628       case NEON_CMTST: cmptst(vf, rd, rn, rm); break;
   2629       case NEON_MLS:   mls(vf, rd, rn, rm); break;
   2630       case NEON_MLA:   mla(vf, rd, rn, rm); break;
   2631       case NEON_MUL:   mul(vf, rd, rn, rm); break;
   2632       case NEON_PMUL:  pmul(vf, rd, rn, rm); break;
   2633       case NEON_SMAX:  smax(vf, rd, rn, rm); break;
   2634       case NEON_SMAXP: smaxp(vf, rd, rn, rm); break;
   2635       case NEON_SMIN:  smin(vf, rd, rn, rm); break;
   2636       case NEON_SMINP: sminp(vf, rd, rn, rm); break;
   2637       case NEON_SUB:   sub(vf, rd, rn, rm);  break;
   2638       case NEON_UMAX:  umax(vf, rd, rn, rm); break;
   2639       case NEON_UMAXP: umaxp(vf, rd, rn, rm); break;
   2640       case NEON_UMIN:  umin(vf, rd, rn, rm); break;
   2641       case NEON_UMINP: uminp(vf, rd, rn, rm); break;
   2642       case NEON_SSHL:  sshl(vf, rd, rn, rm); break;
   2643       case NEON_USHL:  ushl(vf, rd, rn, rm); break;
   2644       case NEON_SABD:  absdiff(vf, rd, rn, rm, true); break;
   2645       case NEON_UABD:  absdiff(vf, rd, rn, rm, false); break;
   2646       case NEON_SABA:  saba(vf, rd, rn, rm); break;
   2647       case NEON_UABA:  uaba(vf, rd, rn, rm); break;
   2648       case NEON_UQADD: add(vf, rd, rn, rm).UnsignedSaturate(vf); break;
   2649       case NEON_SQADD: add(vf, rd, rn, rm).SignedSaturate(vf); break;
   2650       case NEON_UQSUB: sub(vf, rd, rn, rm).UnsignedSaturate(vf); break;
   2651       case NEON_SQSUB: sub(vf, rd, rn, rm).SignedSaturate(vf); break;
   2652       case NEON_SQDMULH:  sqdmulh(vf, rd, rn, rm); break;
   2653       case NEON_SQRDMULH: sqrdmulh(vf, rd, rn, rm); break;
   2654       case NEON_UQSHL: ushl(vf, rd, rn, rm).UnsignedSaturate(vf); break;
   2655       case NEON_SQSHL: sshl(vf, rd, rn, rm).SignedSaturate(vf); break;
   2656       case NEON_URSHL: ushl(vf, rd, rn, rm).Round(vf); break;
   2657       case NEON_SRSHL: sshl(vf, rd, rn, rm).Round(vf); break;
   2658       case NEON_UQRSHL:
   2659         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
   2660         break;
   2661       case NEON_SQRSHL:
   2662         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
   2663         break;
   2664       case NEON_UHADD:
   2665         add(vf, rd, rn, rm).Uhalve(vf);
   2666         break;
   2667       case NEON_URHADD:
   2668         add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
   2669         break;
   2670       case NEON_SHADD:
   2671         add(vf, rd, rn, rm).Halve(vf);
   2672         break;
   2673       case NEON_SRHADD:
   2674         add(vf, rd, rn, rm).Halve(vf).Round(vf);
   2675         break;
   2676       case NEON_UHSUB:
   2677         sub(vf, rd, rn, rm).Uhalve(vf);
   2678         break;
   2679       case NEON_SHSUB:
   2680         sub(vf, rd, rn, rm).Halve(vf);
   2681         break;
   2682       default:
   2683         VIXL_UNIMPLEMENTED();
   2684     }
   2685   }
   2686 }
   2687 
   2688 
   2689 void Simulator::VisitNEON3Different(const Instruction* instr) {
   2690   NEONFormatDecoder nfd(instr);
   2691   VectorFormat vf = nfd.GetVectorFormat();
   2692   VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   2693 
   2694   SimVRegister& rd = vreg(instr->Rd());
   2695   SimVRegister& rn = vreg(instr->Rn());
   2696   SimVRegister& rm = vreg(instr->Rm());
   2697 
   2698   switch (instr->Mask(NEON3DifferentMask)) {
   2699     case NEON_PMULL:    pmull(vf_l, rd, rn, rm); break;
   2700     case NEON_PMULL2:   pmull2(vf_l, rd, rn, rm); break;
   2701     case NEON_UADDL:    uaddl(vf_l, rd, rn, rm); break;
   2702     case NEON_UADDL2:   uaddl2(vf_l, rd, rn, rm); break;
   2703     case NEON_SADDL:    saddl(vf_l, rd, rn, rm); break;
   2704     case NEON_SADDL2:   saddl2(vf_l, rd, rn, rm); break;
   2705     case NEON_USUBL:    usubl(vf_l, rd, rn, rm); break;
   2706     case NEON_USUBL2:   usubl2(vf_l, rd, rn, rm); break;
   2707     case NEON_SSUBL:    ssubl(vf_l, rd, rn, rm); break;
   2708     case NEON_SSUBL2:   ssubl2(vf_l, rd, rn, rm); break;
   2709     case NEON_SABAL:    sabal(vf_l, rd, rn, rm); break;
   2710     case NEON_SABAL2:   sabal2(vf_l, rd, rn, rm); break;
   2711     case NEON_UABAL:    uabal(vf_l, rd, rn, rm); break;
   2712     case NEON_UABAL2:   uabal2(vf_l, rd, rn, rm); break;
   2713     case NEON_SABDL:    sabdl(vf_l, rd, rn, rm); break;
   2714     case NEON_SABDL2:   sabdl2(vf_l, rd, rn, rm); break;
   2715     case NEON_UABDL:    uabdl(vf_l, rd, rn, rm); break;
   2716     case NEON_UABDL2:   uabdl2(vf_l, rd, rn, rm); break;
   2717     case NEON_SMLAL:    smlal(vf_l, rd, rn, rm); break;
   2718     case NEON_SMLAL2:   smlal2(vf_l, rd, rn, rm); break;
   2719     case NEON_UMLAL:    umlal(vf_l, rd, rn, rm); break;
   2720     case NEON_UMLAL2:   umlal2(vf_l, rd, rn, rm); break;
   2721     case NEON_SMLSL:    smlsl(vf_l, rd, rn, rm); break;
   2722     case NEON_SMLSL2:   smlsl2(vf_l, rd, rn, rm); break;
   2723     case NEON_UMLSL:    umlsl(vf_l, rd, rn, rm); break;
   2724     case NEON_UMLSL2:   umlsl2(vf_l, rd, rn, rm); break;
   2725     case NEON_SMULL:    smull(vf_l, rd, rn, rm); break;
   2726     case NEON_SMULL2:   smull2(vf_l, rd, rn, rm); break;
   2727     case NEON_UMULL:    umull(vf_l, rd, rn, rm); break;
   2728     case NEON_UMULL2:   umull2(vf_l, rd, rn, rm); break;
   2729     case NEON_SQDMLAL:  sqdmlal(vf_l, rd, rn, rm); break;
   2730     case NEON_SQDMLAL2: sqdmlal2(vf_l, rd, rn, rm); break;
   2731     case NEON_SQDMLSL:  sqdmlsl(vf_l, rd, rn, rm); break;
   2732     case NEON_SQDMLSL2: sqdmlsl2(vf_l, rd, rn, rm); break;
   2733     case NEON_SQDMULL:  sqdmull(vf_l, rd, rn, rm); break;
   2734     case NEON_SQDMULL2: sqdmull2(vf_l, rd, rn, rm); break;
   2735     case NEON_UADDW:    uaddw(vf_l, rd, rn, rm); break;
   2736     case NEON_UADDW2:   uaddw2(vf_l, rd, rn, rm); break;
   2737     case NEON_SADDW:    saddw(vf_l, rd, rn, rm); break;
   2738     case NEON_SADDW2:   saddw2(vf_l, rd, rn, rm); break;
   2739     case NEON_USUBW:    usubw(vf_l, rd, rn, rm); break;
   2740     case NEON_USUBW2:   usubw2(vf_l, rd, rn, rm); break;
   2741     case NEON_SSUBW:    ssubw(vf_l, rd, rn, rm); break;
   2742     case NEON_SSUBW2:   ssubw2(vf_l, rd, rn, rm); break;
   2743     case NEON_ADDHN:    addhn(vf, rd, rn, rm); break;
   2744     case NEON_ADDHN2:   addhn2(vf, rd, rn, rm); break;
   2745     case NEON_RADDHN:   raddhn(vf, rd, rn, rm); break;
   2746     case NEON_RADDHN2:  raddhn2(vf, rd, rn, rm); break;
   2747     case NEON_SUBHN:    subhn(vf, rd, rn, rm); break;
   2748     case NEON_SUBHN2:   subhn2(vf, rd, rn, rm); break;
   2749     case NEON_RSUBHN:   rsubhn(vf, rd, rn, rm); break;
   2750     case NEON_RSUBHN2:  rsubhn2(vf, rd, rn, rm); break;
   2751     default:
   2752       VIXL_UNIMPLEMENTED();
   2753   }
   2754 }
   2755 
   2756 
   2757 void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
   2758   NEONFormatDecoder nfd(instr);
   2759 
   2760   SimVRegister& rd = vreg(instr->Rd());
   2761   SimVRegister& rn = vreg(instr->Rn());
   2762 
   2763   // The input operand's VectorFormat is passed for these instructions.
   2764   if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
   2765     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   2766 
   2767     switch (instr->Mask(NEONAcrossLanesFPMask)) {
   2768       case NEON_FMAXV: fmaxv(vf, rd, rn); break;
   2769       case NEON_FMINV: fminv(vf, rd, rn); break;
   2770       case NEON_FMAXNMV: fmaxnmv(vf, rd, rn); break;
   2771       case NEON_FMINNMV: fminnmv(vf, rd, rn); break;
   2772       default:
   2773         VIXL_UNIMPLEMENTED();
   2774     }
   2775   } else {
   2776     VectorFormat vf = nfd.GetVectorFormat();
   2777 
   2778     switch (instr->Mask(NEONAcrossLanesMask)) {
   2779       case NEON_ADDV:   addv(vf, rd, rn); break;
   2780       case NEON_SMAXV:  smaxv(vf, rd, rn); break;
   2781       case NEON_SMINV:  sminv(vf, rd, rn); break;
   2782       case NEON_UMAXV:  umaxv(vf, rd, rn); break;
   2783       case NEON_UMINV:  uminv(vf, rd, rn); break;
   2784       case NEON_SADDLV: saddlv(vf, rd, rn); break;
   2785       case NEON_UADDLV: uaddlv(vf, rd, rn); break;
   2786       default:
   2787         VIXL_UNIMPLEMENTED();
   2788     }
   2789   }
   2790 }
   2791 
   2792 
   2793 void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
   2794   NEONFormatDecoder nfd(instr);
   2795   VectorFormat vf_r = nfd.GetVectorFormat();
   2796   VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   2797 
   2798   SimVRegister& rd = vreg(instr->Rd());
   2799   SimVRegister& rn = vreg(instr->Rn());
   2800 
   2801   ByElementOp Op = NULL;
   2802 
   2803   int rm_reg = instr->Rm();
   2804   int index = (instr->NEONH() << 1) | instr->NEONL();
   2805   if (instr->NEONSize() == 1) {
   2806     rm_reg &= 0xf;
   2807     index = (index << 1) | instr->NEONM();
   2808   }
   2809 
   2810   switch (instr->Mask(NEONByIndexedElementMask)) {
   2811     case NEON_MUL_byelement: Op = &Simulator::mul; vf = vf_r; break;
   2812     case NEON_MLA_byelement: Op = &Simulator::mla; vf = vf_r; break;
   2813     case NEON_MLS_byelement: Op = &Simulator::mls; vf = vf_r; break;
   2814     case NEON_SQDMULH_byelement: Op = &Simulator::sqdmulh; vf = vf_r; break;
   2815     case NEON_SQRDMULH_byelement: Op = &Simulator::sqrdmulh; vf = vf_r; break;
   2816     case NEON_SMULL_byelement:
   2817       if (instr->Mask(NEON_Q)) {
   2818         Op = &Simulator::smull2;
   2819       } else {
   2820         Op = &Simulator::smull;
   2821       }
   2822       break;
   2823     case NEON_UMULL_byelement:
   2824       if (instr->Mask(NEON_Q)) {
   2825         Op = &Simulator::umull2;
   2826       } else {
   2827         Op = &Simulator::umull;
   2828       }
   2829       break;
   2830     case NEON_SMLAL_byelement:
   2831       if (instr->Mask(NEON_Q)) {
   2832         Op = &Simulator::smlal2;
   2833       } else {
   2834         Op = &Simulator::smlal;
   2835       }
   2836       break;
   2837     case NEON_UMLAL_byelement:
   2838       if (instr->Mask(NEON_Q)) {
   2839         Op = &Simulator::umlal2;
   2840       } else {
   2841         Op = &Simulator::umlal;
   2842       }
   2843       break;
   2844     case NEON_SMLSL_byelement:
   2845       if (instr->Mask(NEON_Q)) {
   2846         Op = &Simulator::smlsl2;
   2847       } else {
   2848         Op = &Simulator::smlsl;
   2849       }
   2850       break;
   2851     case NEON_UMLSL_byelement:
   2852       if (instr->Mask(NEON_Q)) {
   2853         Op = &Simulator::umlsl2;
   2854       } else {
   2855         Op = &Simulator::umlsl;
   2856       }
   2857       break;
   2858     case NEON_SQDMULL_byelement:
   2859       if (instr->Mask(NEON_Q)) {
   2860         Op = &Simulator::sqdmull2;
   2861       } else {
   2862         Op = &Simulator::sqdmull;
   2863       }
   2864       break;
   2865     case NEON_SQDMLAL_byelement:
   2866       if (instr->Mask(NEON_Q)) {
   2867         Op = &Simulator::sqdmlal2;
   2868       } else {
   2869         Op = &Simulator::sqdmlal;
   2870       }
   2871       break;
   2872     case NEON_SQDMLSL_byelement:
   2873       if (instr->Mask(NEON_Q)) {
   2874         Op = &Simulator::sqdmlsl2;
   2875       } else {
   2876         Op = &Simulator::sqdmlsl;
   2877       }
   2878       break;
   2879     default:
   2880       index = instr->NEONH();
   2881       if ((instr->FPType() & 1) == 0) {
   2882         index = (index << 1) | instr->NEONL();
   2883       }
   2884 
   2885       vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   2886 
   2887       switch (instr->Mask(NEONByIndexedElementFPMask)) {
   2888         case NEON_FMUL_byelement: Op = &Simulator::fmul; break;
   2889         case NEON_FMLA_byelement: Op = &Simulator::fmla; break;
   2890         case NEON_FMLS_byelement: Op = &Simulator::fmls; break;
   2891         case NEON_FMULX_byelement: Op = &Simulator::fmulx; break;
   2892         default: VIXL_UNIMPLEMENTED();
   2893       }
   2894   }
   2895 
   2896   (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
   2897 }
   2898 
   2899 
   2900 void Simulator::VisitNEONCopy(const Instruction* instr) {
   2901   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
   2902   VectorFormat vf = nfd.GetVectorFormat();
   2903 
   2904   SimVRegister& rd = vreg(instr->Rd());
   2905   SimVRegister& rn = vreg(instr->Rn());
   2906   int imm5 = instr->ImmNEON5();
   2907   int tz = CountTrailingZeros(imm5, 32);
   2908   int reg_index = imm5 >> (tz + 1);
   2909 
   2910   if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
   2911     int imm4 = instr->ImmNEON4();
   2912     int rn_index = imm4 >> tz;
   2913     ins_element(vf, rd, reg_index, rn, rn_index);
   2914   } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
   2915     ins_immediate(vf, rd, reg_index, xreg(instr->Rn()));
   2916   } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
   2917     uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
   2918     value &= MaxUintFromFormat(vf);
   2919     set_xreg(instr->Rd(), value);
   2920   } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
   2921     int64_t value = LogicVRegister(rn).Int(vf, reg_index);
   2922     if (instr->NEONQ()) {
   2923       set_xreg(instr->Rd(), value);
   2924     } else {
   2925       set_wreg(instr->Rd(), (int32_t)value);
   2926     }
   2927   } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
   2928     dup_element(vf, rd, rn, reg_index);
   2929   } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
   2930     dup_immediate(vf, rd, xreg(instr->Rn()));
   2931   } else {
   2932     VIXL_UNIMPLEMENTED();
   2933   }
   2934 }
   2935 
   2936 
   2937 void Simulator::VisitNEONExtract(const Instruction* instr) {
   2938   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
   2939   VectorFormat vf = nfd.GetVectorFormat();
   2940   SimVRegister& rd = vreg(instr->Rd());
   2941   SimVRegister& rn = vreg(instr->Rn());
   2942   SimVRegister& rm = vreg(instr->Rm());
   2943   if (instr->Mask(NEONExtractMask) == NEON_EXT) {
   2944     int index = instr->ImmNEONExt();
   2945     ext(vf, rd, rn, rm, index);
   2946   } else {
   2947     VIXL_UNIMPLEMENTED();
   2948   }
   2949 }
   2950 
   2951 
   2952 void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
   2953                                                AddrMode addr_mode) {
   2954   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
   2955   VectorFormat vf = nfd.GetVectorFormat();
   2956 
   2957   uint64_t addr_base = xreg(instr->Rn(), Reg31IsStackPointer);
   2958   int reg_size = RegisterSizeInBytesFromFormat(vf);
   2959 
   2960   int reg[4];
   2961   uint64_t addr[4];
   2962   for (int i = 0; i < 4; i++) {
   2963     reg[i] = (instr->Rt() + i) % kNumberOfVRegisters;
   2964     addr[i] = addr_base + (i * reg_size);
   2965   }
   2966   int count = 1;
   2967   bool log_read = true;
   2968 
   2969   Instr itype = instr->Mask(NEONLoadStoreMultiStructMask);
   2970   if (((itype == NEON_LD1_1v) || (itype == NEON_LD1_2v) ||
   2971        (itype == NEON_LD1_3v) || (itype == NEON_LD1_4v) ||
   2972        (itype == NEON_ST1_1v) || (itype == NEON_ST1_2v) ||
   2973        (itype == NEON_ST1_3v) || (itype == NEON_ST1_4v)) &&
   2974       (instr->Bits(20, 16) != 0)) {
   2975     VIXL_UNREACHABLE();
   2976   }
   2977 
   2978   // We use the PostIndex mask here, as it works in this case for both Offset
   2979   // and PostIndex addressing.
   2980   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
   2981     case NEON_LD1_4v:
   2982     case NEON_LD1_4v_post: ld1(vf, vreg(reg[3]), addr[3]); count++;
   2983       VIXL_FALLTHROUGH();
   2984     case NEON_LD1_3v:
   2985     case NEON_LD1_3v_post: ld1(vf, vreg(reg[2]), addr[2]); count++;
   2986       VIXL_FALLTHROUGH();
   2987     case NEON_LD1_2v:
   2988     case NEON_LD1_2v_post: ld1(vf, vreg(reg[1]), addr[1]); count++;
   2989       VIXL_FALLTHROUGH();
   2990     case NEON_LD1_1v:
   2991     case NEON_LD1_1v_post:
   2992       ld1(vf, vreg(reg[0]), addr[0]);
   2993       log_read = true;
   2994       break;
   2995     case NEON_ST1_4v:
   2996     case NEON_ST1_4v_post: st1(vf, vreg(reg[3]), addr[3]); count++;
   2997       VIXL_FALLTHROUGH();
   2998     case NEON_ST1_3v:
   2999     case NEON_ST1_3v_post: st1(vf, vreg(reg[2]), addr[2]); count++;
   3000       VIXL_FALLTHROUGH();
   3001     case NEON_ST1_2v:
   3002     case NEON_ST1_2v_post: st1(vf, vreg(reg[1]), addr[1]); count++;
   3003       VIXL_FALLTHROUGH();
   3004     case NEON_ST1_1v:
   3005     case NEON_ST1_1v_post:
   3006       st1(vf, vreg(reg[0]), addr[0]);
   3007       log_read = false;
   3008       break;
   3009     case NEON_LD2_post:
   3010     case NEON_LD2:
   3011       ld2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
   3012       count = 2;
   3013       break;
   3014     case NEON_ST2:
   3015     case NEON_ST2_post:
   3016       st2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
   3017       count = 2;
   3018       break;
   3019     case NEON_LD3_post:
   3020     case NEON_LD3:
   3021       ld3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
   3022       count = 3;
   3023       break;
   3024     case NEON_ST3:
   3025     case NEON_ST3_post:
   3026       st3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
   3027       count = 3;
   3028       break;
   3029     case NEON_ST4:
   3030     case NEON_ST4_post:
   3031       st4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
   3032           addr[0]);
   3033       count = 4;
   3034       break;
   3035     case NEON_LD4_post:
   3036     case NEON_LD4:
   3037       ld4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
   3038           addr[0]);
   3039       count = 4;
   3040       break;
   3041     default: VIXL_UNIMPLEMENTED();
   3042   }
   3043 
   3044   // Explicitly log the register update whilst we have type information.
   3045   for (int i = 0; i < count; i++) {
   3046     // For de-interleaving loads, only print the base address.
   3047     int lane_size = LaneSizeInBytesFromFormat(vf);
   3048     PrintRegisterFormat format = GetPrintRegisterFormatTryFP(
   3049         GetPrintRegisterFormatForSize(reg_size, lane_size));
   3050     if (log_read) {
   3051       LogVRead(addr_base, reg[i], format);
   3052     } else {
   3053       LogVWrite(addr_base, reg[i], format);
   3054     }
   3055   }
   3056 
   3057   if (addr_mode == PostIndex) {
   3058     int rm = instr->Rm();
   3059     // The immediate post index addressing mode is indicated by rm = 31.
   3060     // The immediate is implied by the number of vector registers used.
   3061     addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count
   3062                             : xreg(rm);
   3063     set_xreg(instr->Rn(), addr_base);
   3064   } else {
   3065     VIXL_ASSERT(addr_mode == Offset);
   3066   }
   3067 }
   3068 
   3069 
   3070 void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
   3071   NEONLoadStoreMultiStructHelper(instr, Offset);
   3072 }
   3073 
   3074 
   3075 void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
   3076     const Instruction* instr) {
   3077   NEONLoadStoreMultiStructHelper(instr, PostIndex);
   3078 }
   3079 
   3080 
   3081 void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
   3082                                                 AddrMode addr_mode) {
   3083   uint64_t addr = xreg(instr->Rn(), Reg31IsStackPointer);
   3084   int rt = instr->Rt();
   3085 
   3086   Instr itype = instr->Mask(NEONLoadStoreSingleStructMask);
   3087   if (((itype == NEON_LD1_b) || (itype == NEON_LD1_h) ||
   3088        (itype == NEON_LD1_s) || (itype == NEON_LD1_d)) &&
   3089       (instr->Bits(20, 16) != 0)) {
   3090     VIXL_UNREACHABLE();
   3091   }
   3092 
   3093   // We use the PostIndex mask here, as it works in this case for both Offset
   3094   // and PostIndex addressing.
   3095   bool do_load = false;
   3096 
   3097   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
   3098   VectorFormat vf_t = nfd.GetVectorFormat();
   3099 
   3100   VectorFormat vf = kFormat16B;
   3101   switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
   3102     case NEON_LD1_b:
   3103     case NEON_LD1_b_post:
   3104     case NEON_LD2_b:
   3105     case NEON_LD2_b_post:
   3106     case NEON_LD3_b:
   3107     case NEON_LD3_b_post:
   3108     case NEON_LD4_b:
   3109     case NEON_LD4_b_post: do_load = true;
   3110       VIXL_FALLTHROUGH();
   3111     case NEON_ST1_b:
   3112     case NEON_ST1_b_post:
   3113     case NEON_ST2_b:
   3114     case NEON_ST2_b_post:
   3115     case NEON_ST3_b:
   3116     case NEON_ST3_b_post:
   3117     case NEON_ST4_b:
   3118     case NEON_ST4_b_post: break;
   3119 
   3120     case NEON_LD1_h:
   3121     case NEON_LD1_h_post:
   3122     case NEON_LD2_h:
   3123     case NEON_LD2_h_post:
   3124     case NEON_LD3_h:
   3125     case NEON_LD3_h_post:
   3126     case NEON_LD4_h:
   3127     case NEON_LD4_h_post: do_load = true;
   3128       VIXL_FALLTHROUGH();
   3129     case NEON_ST1_h:
   3130     case NEON_ST1_h_post:
   3131     case NEON_ST2_h:
   3132     case NEON_ST2_h_post:
   3133     case NEON_ST3_h:
   3134     case NEON_ST3_h_post:
   3135     case NEON_ST4_h:
   3136     case NEON_ST4_h_post: vf = kFormat8H; break;
   3137     case NEON_LD1_s:
   3138     case NEON_LD1_s_post:
   3139     case NEON_LD2_s:
   3140     case NEON_LD2_s_post:
   3141     case NEON_LD3_s:
   3142     case NEON_LD3_s_post:
   3143     case NEON_LD4_s:
   3144     case NEON_LD4_s_post: do_load = true;
   3145       VIXL_FALLTHROUGH();
   3146     case NEON_ST1_s:
   3147     case NEON_ST1_s_post:
   3148     case NEON_ST2_s:
   3149     case NEON_ST2_s_post:
   3150     case NEON_ST3_s:
   3151     case NEON_ST3_s_post:
   3152     case NEON_ST4_s:
   3153     case NEON_ST4_s_post: {
   3154       VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
   3155       VIXL_STATIC_ASSERT(
   3156           (NEON_LD1_s_post | (1 << NEONLSSize_offset)) == NEON_LD1_d_post);
   3157       VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
   3158       VIXL_STATIC_ASSERT(
   3159           (NEON_ST1_s_post | (1 << NEONLSSize_offset)) == NEON_ST1_d_post);
   3160       vf = ((instr->NEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
   3161       break;
   3162     }
   3163 
   3164     case NEON_LD1R:
   3165     case NEON_LD1R_post: {
   3166       vf = vf_t;
   3167       ld1r(vf, vreg(rt), addr);
   3168       do_load = true;
   3169       break;
   3170     }
   3171 
   3172     case NEON_LD2R:
   3173     case NEON_LD2R_post: {
   3174       vf = vf_t;
   3175       int rt2 = (rt + 1) % kNumberOfVRegisters;
   3176       ld2r(vf, vreg(rt), vreg(rt2), addr);
   3177       do_load = true;
   3178       break;
   3179     }
   3180 
   3181     case NEON_LD3R:
   3182     case NEON_LD3R_post: {
   3183       vf = vf_t;
   3184       int rt2 = (rt + 1) % kNumberOfVRegisters;
   3185       int rt3 = (rt2 + 1) % kNumberOfVRegisters;
   3186       ld3r(vf, vreg(rt), vreg(rt2), vreg(rt3), addr);
   3187       do_load = true;
   3188       break;
   3189     }
   3190 
   3191     case NEON_LD4R:
   3192     case NEON_LD4R_post: {
   3193       vf = vf_t;
   3194       int rt2 = (rt + 1) % kNumberOfVRegisters;
   3195       int rt3 = (rt2 + 1) % kNumberOfVRegisters;
   3196       int rt4 = (rt3 + 1) % kNumberOfVRegisters;
   3197       ld4r(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), addr);
   3198       do_load = true;
   3199       break;
   3200     }
   3201     default: VIXL_UNIMPLEMENTED();
   3202   }
   3203 
   3204   PrintRegisterFormat print_format =
   3205       GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
   3206   // Make sure that the print_format only includes a single lane.
   3207   print_format =
   3208       static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask);
   3209 
   3210   int esize = LaneSizeInBytesFromFormat(vf);
   3211   int index_shift = LaneSizeInBytesLog2FromFormat(vf);
   3212   int lane = instr->NEONLSIndex(index_shift);
   3213   int scale = 0;
   3214   int rt2 = (rt + 1) % kNumberOfVRegisters;
   3215   int rt3 = (rt2 + 1) % kNumberOfVRegisters;
   3216   int rt4 = (rt3 + 1) % kNumberOfVRegisters;
   3217   switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
   3218     case NEONLoadStoreSingle1:
   3219       scale = 1;
   3220       if (do_load) {
   3221         ld1(vf, vreg(rt), lane, addr);
   3222         LogVRead(addr, rt, print_format, lane);
   3223       } else {
   3224         st1(vf, vreg(rt), lane, addr);
   3225         LogVWrite(addr, rt, print_format, lane);
   3226       }
   3227       break;
   3228     case NEONLoadStoreSingle2:
   3229       scale = 2;
   3230       if (do_load) {
   3231         ld2(vf, vreg(rt), vreg(rt2), lane, addr);
   3232         LogVRead(addr, rt, print_format, lane);
   3233         LogVRead(addr + esize, rt2, print_format, lane);
   3234       } else {
   3235         st2(vf, vreg(rt), vreg(rt2), lane, addr);
   3236         LogVWrite(addr, rt, print_format, lane);
   3237         LogVWrite(addr + esize, rt2, print_format, lane);
   3238       }
   3239       break;
   3240     case NEONLoadStoreSingle3:
   3241       scale = 3;
   3242       if (do_load) {
   3243         ld3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
   3244         LogVRead(addr, rt, print_format, lane);
   3245         LogVRead(addr + esize, rt2, print_format, lane);
   3246         LogVRead(addr + (2 * esize), rt3, print_format, lane);
   3247       } else {
   3248         st3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
   3249         LogVWrite(addr, rt, print_format, lane);
   3250         LogVWrite(addr + esize, rt2, print_format, lane);
   3251         LogVWrite(addr + (2 * esize), rt3, print_format, lane);
   3252       }
   3253       break;
   3254     case NEONLoadStoreSingle4:
   3255       scale = 4;
   3256       if (do_load) {
   3257         ld4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
   3258         LogVRead(addr, rt, print_format, lane);
   3259         LogVRead(addr + esize, rt2, print_format, lane);
   3260         LogVRead(addr + (2 * esize), rt3, print_format, lane);
   3261         LogVRead(addr + (3 * esize), rt4, print_format, lane);
   3262       } else {
   3263         st4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
   3264         LogVWrite(addr, rt, print_format, lane);
   3265         LogVWrite(addr + esize, rt2, print_format, lane);
   3266         LogVWrite(addr + (2 * esize), rt3, print_format, lane);
   3267         LogVWrite(addr + (3 * esize), rt4, print_format, lane);
   3268       }
   3269       break;
   3270     default: VIXL_UNIMPLEMENTED();
   3271   }
   3272 
   3273   if (addr_mode == PostIndex) {
   3274     int rm = instr->Rm();
   3275     int lane_size = LaneSizeInBytesFromFormat(vf);
   3276     set_xreg(instr->Rn(), addr + ((rm == 31) ? (scale * lane_size) : xreg(rm)));
   3277   }
   3278 }
   3279 
   3280 
   3281 void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
   3282   NEONLoadStoreSingleStructHelper(instr, Offset);
   3283 }
   3284 
   3285 
   3286 void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
   3287     const Instruction* instr) {
   3288   NEONLoadStoreSingleStructHelper(instr, PostIndex);
   3289 }
   3290 
   3291 
   3292 void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
   3293   SimVRegister& rd = vreg(instr->Rd());
   3294   int cmode = instr->NEONCmode();
   3295   int cmode_3_1 = (cmode >> 1) & 7;
   3296   int cmode_3 = (cmode >> 3) & 1;
   3297   int cmode_2 = (cmode >> 2) & 1;
   3298   int cmode_1 = (cmode >> 1) & 1;
   3299   int cmode_0 = cmode & 1;
   3300   int q = instr->NEONQ();
   3301   int op_bit = instr->NEONModImmOp();
   3302   uint64_t imm8  = instr->ImmNEONabcdefgh();
   3303 
   3304   // Find the format and immediate value
   3305   uint64_t imm = 0;
   3306   VectorFormat vform = kFormatUndefined;
   3307   switch (cmode_3_1) {
   3308     case 0x0:
   3309     case 0x1:
   3310     case 0x2:
   3311     case 0x3:
   3312       vform = (q == 1) ? kFormat4S : kFormat2S;
   3313       imm = imm8 << (8 * cmode_3_1);
   3314       break;
   3315     case 0x4:
   3316     case 0x5:
   3317       vform = (q == 1) ? kFormat8H : kFormat4H;
   3318       imm = imm8 << (8 * cmode_1);
   3319       break;
   3320     case 0x6:
   3321       vform = (q == 1) ? kFormat4S : kFormat2S;
   3322       if (cmode_0 == 0) {
   3323         imm = imm8 << 8  | 0x000000ff;
   3324       } else {
   3325         imm = imm8 << 16 | 0x0000ffff;
   3326       }
   3327       break;
   3328     case 0x7:
   3329       if (cmode_0 == 0 && op_bit == 0) {
   3330         vform = q ? kFormat16B : kFormat8B;
   3331         imm = imm8;
   3332       } else if (cmode_0 == 0 && op_bit == 1) {
   3333         vform = q ? kFormat2D : kFormat1D;
   3334         imm = 0;
   3335         for (int i = 0; i < 8; ++i) {
   3336           if (imm8 & (1 << i)) {
   3337             imm |= (UINT64_C(0xff) << (8 * i));
   3338           }
   3339         }
   3340       } else {  // cmode_0 == 1, cmode == 0xf.
   3341         if (op_bit == 0) {
   3342           vform = q ? kFormat4S : kFormat2S;
   3343           imm = float_to_rawbits(instr->ImmNEONFP32());
   3344         } else if (q == 1) {
   3345           vform = kFormat2D;
   3346           imm = double_to_rawbits(instr->ImmNEONFP64());
   3347         } else {
   3348           VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
   3349           VisitUnallocated(instr);
   3350         }
   3351       }
   3352       break;
   3353     default: VIXL_UNREACHABLE(); break;
   3354   }
   3355 
   3356   // Find the operation
   3357   NEONModifiedImmediateOp op;
   3358   if (cmode_3 == 0) {
   3359     if (cmode_0 == 0) {
   3360       op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   3361     } else {  // cmode<0> == '1'
   3362       op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
   3363     }
   3364   } else {  // cmode<3> == '1'
   3365     if (cmode_2 == 0) {
   3366       if (cmode_0 == 0) {
   3367         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   3368       } else {  // cmode<0> == '1'
   3369         op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
   3370       }
   3371     } else {  // cmode<2> == '1'
   3372        if (cmode_1 == 0) {
   3373          op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   3374        } else {  // cmode<1> == '1'
   3375          if (cmode_0 == 0) {
   3376            op = NEONModifiedImmediate_MOVI;
   3377          } else {  // cmode<0> == '1'
   3378            op = NEONModifiedImmediate_MOVI;
   3379          }
   3380        }
   3381     }
   3382   }
   3383 
   3384   // Call the logic function
   3385   if (op == NEONModifiedImmediate_ORR) {
   3386     orr(vform, rd, rd, imm);
   3387   } else if (op == NEONModifiedImmediate_BIC) {
   3388     bic(vform, rd, rd, imm);
   3389   } else  if (op == NEONModifiedImmediate_MOVI) {
   3390     movi(vform, rd, imm);
   3391   } else if (op == NEONModifiedImmediate_MVNI) {
   3392     mvni(vform, rd, imm);
   3393   } else {
   3394     VisitUnimplemented(instr);
   3395   }
   3396 }
   3397 
   3398 
   3399 void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
   3400   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   3401   VectorFormat vf = nfd.GetVectorFormat();
   3402 
   3403   SimVRegister& rd = vreg(instr->Rd());
   3404   SimVRegister& rn = vreg(instr->Rn());
   3405 
   3406   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
   3407     // These instructions all use a two bit size field, except NOT and RBIT,
   3408     // which use the field to encode the operation.
   3409     switch (instr->Mask(NEONScalar2RegMiscMask)) {
   3410       case NEON_CMEQ_zero_scalar: cmp(vf, rd, rn, 0, eq); break;
   3411       case NEON_CMGE_zero_scalar: cmp(vf, rd, rn, 0, ge); break;
   3412       case NEON_CMGT_zero_scalar: cmp(vf, rd, rn, 0, gt); break;
   3413       case NEON_CMLT_zero_scalar: cmp(vf, rd, rn, 0, lt); break;
   3414       case NEON_CMLE_zero_scalar: cmp(vf, rd, rn, 0, le); break;
   3415       case NEON_ABS_scalar:       abs(vf, rd, rn); break;
   3416       case NEON_SQABS_scalar:     abs(vf, rd, rn).SignedSaturate(vf); break;
   3417       case NEON_NEG_scalar:       neg(vf, rd, rn); break;
   3418       case NEON_SQNEG_scalar:     neg(vf, rd, rn).SignedSaturate(vf); break;
   3419       case NEON_SUQADD_scalar:    suqadd(vf, rd, rn); break;
   3420       case NEON_USQADD_scalar:    usqadd(vf, rd, rn); break;
   3421       default: VIXL_UNIMPLEMENTED(); break;
   3422     }
   3423   } else {
   3424     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   3425     FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   3426 
   3427     // These instructions all use a one bit size field, except SQXTUN, SQXTN
   3428     // and UQXTN, which use a two bit size field.
   3429     switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
   3430       case NEON_FRECPE_scalar:     frecpe(fpf, rd, rn, fpcr_rounding); break;
   3431       case NEON_FRECPX_scalar:     frecpx(fpf, rd, rn); break;
   3432       case NEON_FRSQRTE_scalar:    frsqrte(fpf, rd, rn); break;
   3433       case NEON_FCMGT_zero_scalar: fcmp_zero(fpf, rd, rn, gt); break;
   3434       case NEON_FCMGE_zero_scalar: fcmp_zero(fpf, rd, rn, ge); break;
   3435       case NEON_FCMEQ_zero_scalar: fcmp_zero(fpf, rd, rn, eq); break;
   3436       case NEON_FCMLE_zero_scalar: fcmp_zero(fpf, rd, rn, le); break;
   3437       case NEON_FCMLT_zero_scalar: fcmp_zero(fpf, rd, rn, lt); break;
   3438       case NEON_SCVTF_scalar:      scvtf(fpf, rd, rn, 0, fpcr_rounding); break;
   3439       case NEON_UCVTF_scalar:      ucvtf(fpf, rd, rn, 0, fpcr_rounding); break;
   3440       case NEON_FCVTNS_scalar: fcvts(fpf, rd, rn, FPTieEven); break;
   3441       case NEON_FCVTNU_scalar: fcvtu(fpf, rd, rn, FPTieEven); break;
   3442       case NEON_FCVTPS_scalar: fcvts(fpf, rd, rn, FPPositiveInfinity); break;
   3443       case NEON_FCVTPU_scalar: fcvtu(fpf, rd, rn, FPPositiveInfinity); break;
   3444       case NEON_FCVTMS_scalar: fcvts(fpf, rd, rn, FPNegativeInfinity); break;
   3445       case NEON_FCVTMU_scalar: fcvtu(fpf, rd, rn, FPNegativeInfinity); break;
   3446       case NEON_FCVTZS_scalar: fcvts(fpf, rd, rn, FPZero); break;
   3447       case NEON_FCVTZU_scalar: fcvtu(fpf, rd, rn, FPZero); break;
   3448       case NEON_FCVTAS_scalar: fcvts(fpf, rd, rn, FPTieAway); break;
   3449       case NEON_FCVTAU_scalar: fcvtu(fpf, rd, rn, FPTieAway); break;
   3450       case NEON_FCVTXN_scalar:
   3451         // Unlike all of the other FP instructions above, fcvtxn encodes dest
   3452         // size S as size<0>=1. There's only one case, so we ignore the form.
   3453         VIXL_ASSERT(instr->Bit(22) == 1);
   3454         fcvtxn(kFormatS, rd, rn);
   3455         break;
   3456       default:
   3457         switch (instr->Mask(NEONScalar2RegMiscMask)) {
   3458           case NEON_SQXTN_scalar:  sqxtn(vf, rd, rn); break;
   3459           case NEON_UQXTN_scalar:  uqxtn(vf, rd, rn); break;
   3460           case NEON_SQXTUN_scalar: sqxtun(vf, rd, rn); break;
   3461           default:
   3462             VIXL_UNIMPLEMENTED();
   3463         }
   3464     }
   3465   }
   3466 }
   3467 
   3468 
   3469 void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
   3470   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
   3471   VectorFormat vf = nfd.GetVectorFormat();
   3472 
   3473   SimVRegister& rd = vreg(instr->Rd());
   3474   SimVRegister& rn = vreg(instr->Rn());
   3475   SimVRegister& rm = vreg(instr->Rm());
   3476   switch (instr->Mask(NEONScalar3DiffMask)) {
   3477     case NEON_SQDMLAL_scalar: sqdmlal(vf, rd, rn, rm); break;
   3478     case NEON_SQDMLSL_scalar: sqdmlsl(vf, rd, rn, rm); break;
   3479     case NEON_SQDMULL_scalar: sqdmull(vf, rd, rn, rm); break;
   3480     default:
   3481       VIXL_UNIMPLEMENTED();
   3482   }
   3483 }
   3484 
   3485 
   3486 void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
   3487   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   3488   VectorFormat vf = nfd.GetVectorFormat();
   3489 
   3490   SimVRegister& rd = vreg(instr->Rd());
   3491   SimVRegister& rn = vreg(instr->Rn());
   3492   SimVRegister& rm = vreg(instr->Rm());
   3493 
   3494   if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
   3495     vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   3496     switch (instr->Mask(NEONScalar3SameFPMask)) {
   3497       case NEON_FMULX_scalar:   fmulx(vf, rd, rn, rm); break;
   3498       case NEON_FACGE_scalar:   fabscmp(vf, rd, rn, rm, ge); break;
   3499       case NEON_FACGT_scalar:   fabscmp(vf, rd, rn, rm, gt); break;
   3500       case NEON_FCMEQ_scalar:   fcmp(vf, rd, rn, rm, eq); break;
   3501       case NEON_FCMGE_scalar:   fcmp(vf, rd, rn, rm, ge); break;
   3502       case NEON_FCMGT_scalar:   fcmp(vf, rd, rn, rm, gt); break;
   3503       case NEON_FRECPS_scalar:  frecps(vf, rd, rn, rm); break;
   3504       case NEON_FRSQRTS_scalar: frsqrts(vf, rd, rn, rm); break;
   3505       case NEON_FABD_scalar:    fabd(vf, rd, rn, rm); break;
   3506       default:
   3507         VIXL_UNIMPLEMENTED();
   3508     }
   3509   } else {
   3510     switch (instr->Mask(NEONScalar3SameMask)) {
   3511       case NEON_ADD_scalar:      add(vf, rd, rn, rm); break;
   3512       case NEON_SUB_scalar:      sub(vf, rd, rn, rm); break;
   3513       case NEON_CMEQ_scalar:     cmp(vf, rd, rn, rm, eq); break;
   3514       case NEON_CMGE_scalar:     cmp(vf, rd, rn, rm, ge); break;
   3515       case NEON_CMGT_scalar:     cmp(vf, rd, rn, rm, gt); break;
   3516       case NEON_CMHI_scalar:     cmp(vf, rd, rn, rm, hi); break;
   3517       case NEON_CMHS_scalar:     cmp(vf, rd, rn, rm, hs); break;
   3518       case NEON_CMTST_scalar:    cmptst(vf, rd, rn, rm); break;
   3519       case NEON_USHL_scalar:     ushl(vf, rd, rn, rm); break;
   3520       case NEON_SSHL_scalar:     sshl(vf, rd, rn, rm); break;
   3521       case NEON_SQDMULH_scalar:  sqdmulh(vf, rd, rn, rm); break;
   3522       case NEON_SQRDMULH_scalar: sqrdmulh(vf, rd, rn, rm); break;
   3523       case NEON_UQADD_scalar:
   3524         add(vf, rd, rn, rm).UnsignedSaturate(vf);
   3525         break;
   3526       case NEON_SQADD_scalar:
   3527         add(vf, rd, rn, rm).SignedSaturate(vf);
   3528         break;
   3529       case NEON_UQSUB_scalar:
   3530         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
   3531         break;
   3532       case NEON_SQSUB_scalar:
   3533         sub(vf, rd, rn, rm).SignedSaturate(vf);
   3534         break;
   3535       case NEON_UQSHL_scalar:
   3536         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
   3537         break;
   3538       case NEON_SQSHL_scalar:
   3539         sshl(vf, rd, rn, rm).SignedSaturate(vf);
   3540         break;
   3541       case NEON_URSHL_scalar:
   3542         ushl(vf, rd, rn, rm).Round(vf);
   3543         break;
   3544       case NEON_SRSHL_scalar:
   3545         sshl(vf, rd, rn, rm).Round(vf);
   3546         break;
   3547       case NEON_UQRSHL_scalar:
   3548         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
   3549         break;
   3550       case NEON_SQRSHL_scalar:
   3551         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
   3552         break;
   3553       default:
   3554         VIXL_UNIMPLEMENTED();
   3555     }
   3556   }
   3557 }
   3558 
   3559 
   3560 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
   3561   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
   3562   VectorFormat vf = nfd.GetVectorFormat();
   3563   VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
   3564 
   3565   SimVRegister& rd = vreg(instr->Rd());
   3566   SimVRegister& rn = vreg(instr->Rn());
   3567   ByElementOp Op = NULL;
   3568 
   3569   int rm_reg = instr->Rm();
   3570   int index = (instr->NEONH() << 1) | instr->NEONL();
   3571   if (instr->NEONSize() == 1) {
   3572     rm_reg &= 0xf;
   3573     index = (index << 1) | instr->NEONM();
   3574   }
   3575 
   3576   switch (instr->Mask(NEONScalarByIndexedElementMask)) {
   3577     case NEON_SQDMULL_byelement_scalar: Op = &Simulator::sqdmull; break;
   3578     case NEON_SQDMLAL_byelement_scalar: Op = &Simulator::sqdmlal; break;
   3579     case NEON_SQDMLSL_byelement_scalar: Op = &Simulator::sqdmlsl; break;
   3580     case NEON_SQDMULH_byelement_scalar:
   3581       Op = &Simulator::sqdmulh;
   3582       vf = vf_r;
   3583       break;
   3584     case NEON_SQRDMULH_byelement_scalar:
   3585       Op = &Simulator::sqrdmulh;
   3586       vf = vf_r;
   3587       break;
   3588     default:
   3589       vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   3590       index = instr->NEONH();
   3591       if ((instr->FPType() & 1) == 0) {
   3592         index = (index << 1) | instr->NEONL();
   3593       }
   3594       switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
   3595         case NEON_FMUL_byelement_scalar: Op = &Simulator::fmul; break;
   3596         case NEON_FMLA_byelement_scalar: Op = &Simulator::fmla; break;
   3597         case NEON_FMLS_byelement_scalar: Op = &Simulator::fmls; break;
   3598         case NEON_FMULX_byelement_scalar: Op = &Simulator::fmulx; break;
   3599         default: VIXL_UNIMPLEMENTED();
   3600       }
   3601   }
   3602 
   3603   (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
   3604 }
   3605 
   3606 
   3607 void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
   3608   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
   3609   VectorFormat vf = nfd.GetVectorFormat();
   3610 
   3611   SimVRegister& rd = vreg(instr->Rd());
   3612   SimVRegister& rn = vreg(instr->Rn());
   3613 
   3614   if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
   3615     int imm5 = instr->ImmNEON5();
   3616     int tz = CountTrailingZeros(imm5, 32);
   3617     int rn_index = imm5 >> (tz + 1);
   3618     dup_element(vf, rd, rn, rn_index);
   3619   } else {
   3620     VIXL_UNIMPLEMENTED();
   3621   }
   3622 }
   3623 
   3624 
   3625 void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
   3626   NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap());
   3627   VectorFormat vf = nfd.GetVectorFormat();
   3628 
   3629   SimVRegister& rd = vreg(instr->Rd());
   3630   SimVRegister& rn = vreg(instr->Rn());
   3631   switch (instr->Mask(NEONScalarPairwiseMask)) {
   3632     case NEON_ADDP_scalar:    addp(vf, rd, rn); break;
   3633     case NEON_FADDP_scalar:   faddp(vf, rd, rn); break;
   3634     case NEON_FMAXP_scalar:   fmaxp(vf, rd, rn); break;
   3635     case NEON_FMAXNMP_scalar: fmaxnmp(vf, rd, rn); break;
   3636     case NEON_FMINP_scalar:   fminp(vf, rd, rn); break;
   3637     case NEON_FMINNMP_scalar: fminnmp(vf, rd, rn); break;
   3638     default:
   3639       VIXL_UNIMPLEMENTED();
   3640   }
   3641 }
   3642 
   3643 
   3644 void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
   3645   SimVRegister& rd = vreg(instr->Rd());
   3646   SimVRegister& rn = vreg(instr->Rn());
   3647   FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   3648 
   3649   static const NEONFormatMap map = {
   3650     {22, 21, 20, 19},
   3651     {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S,
   3652      NF_D,     NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D}
   3653   };
   3654   NEONFormatDecoder nfd(instr, &map);
   3655   VectorFormat vf = nfd.GetVectorFormat();
   3656 
   3657   int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
   3658   int immhimmb = instr->ImmNEONImmhImmb();
   3659   int right_shift = (16 << highestSetBit) - immhimmb;
   3660   int left_shift = immhimmb - (8 << highestSetBit);
   3661   switch (instr->Mask(NEONScalarShiftImmediateMask)) {
   3662     case NEON_SHL_scalar:       shl(vf, rd, rn, left_shift); break;
   3663     case NEON_SLI_scalar:       sli(vf, rd, rn, left_shift); break;
   3664     case NEON_SQSHL_imm_scalar: sqshl(vf, rd, rn, left_shift); break;
   3665     case NEON_UQSHL_imm_scalar: uqshl(vf, rd, rn, left_shift); break;
   3666     case NEON_SQSHLU_scalar:    sqshlu(vf, rd, rn, left_shift); break;
   3667     case NEON_SRI_scalar:       sri(vf, rd, rn, right_shift); break;
   3668     case NEON_SSHR_scalar:      sshr(vf, rd, rn, right_shift); break;
   3669     case NEON_USHR_scalar:      ushr(vf, rd, rn, right_shift); break;
   3670     case NEON_SRSHR_scalar:     sshr(vf, rd, rn, right_shift).Round(vf); break;
   3671     case NEON_URSHR_scalar:     ushr(vf, rd, rn, right_shift).Round(vf); break;
   3672     case NEON_SSRA_scalar:      ssra(vf, rd, rn, right_shift); break;
   3673     case NEON_USRA_scalar:      usra(vf, rd, rn, right_shift); break;
   3674     case NEON_SRSRA_scalar:     srsra(vf, rd, rn, right_shift); break;
   3675     case NEON_URSRA_scalar:     ursra(vf, rd, rn, right_shift); break;
   3676     case NEON_UQSHRN_scalar:    uqshrn(vf, rd, rn, right_shift); break;
   3677     case NEON_UQRSHRN_scalar:   uqrshrn(vf, rd, rn, right_shift); break;
   3678     case NEON_SQSHRN_scalar:    sqshrn(vf, rd, rn, right_shift); break;
   3679     case NEON_SQRSHRN_scalar:   sqrshrn(vf, rd, rn, right_shift); break;
   3680     case NEON_SQSHRUN_scalar:   sqshrun(vf, rd, rn, right_shift); break;
   3681     case NEON_SQRSHRUN_scalar:  sqrshrun(vf, rd, rn, right_shift); break;
   3682     case NEON_FCVTZS_imm_scalar: fcvts(vf, rd, rn, FPZero, right_shift); break;
   3683     case NEON_FCVTZU_imm_scalar: fcvtu(vf, rd, rn, FPZero, right_shift); break;
   3684     case NEON_SCVTF_imm_scalar:
   3685       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
   3686       break;
   3687     case NEON_UCVTF_imm_scalar:
   3688       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
   3689       break;
   3690     default:
   3691       VIXL_UNIMPLEMENTED();
   3692   }
   3693 }
   3694 
   3695 
   3696 void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
   3697   SimVRegister& rd = vreg(instr->Rd());
   3698   SimVRegister& rn = vreg(instr->Rn());
   3699   FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   3700 
   3701   // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
   3702   // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
   3703   static const NEONFormatMap map = {
   3704     {22, 21, 20, 19, 30},
   3705     {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B, NF_4H,    NF_8H, NF_4H,    NF_8H,
   3706      NF_2S,    NF_4S,    NF_2S,    NF_4S,  NF_2S,    NF_4S, NF_2S,    NF_4S,
   3707      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
   3708      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}
   3709   };
   3710   NEONFormatDecoder nfd(instr, &map);
   3711   VectorFormat vf = nfd.GetVectorFormat();
   3712 
   3713   // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
   3714   static const NEONFormatMap map_l = {
   3715     {22, 21, 20, 19},
   3716     {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}
   3717   };
   3718   VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
   3719 
   3720   int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
   3721   int immhimmb = instr->ImmNEONImmhImmb();
   3722   int right_shift = (16 << highestSetBit) - immhimmb;
   3723   int left_shift = immhimmb - (8 << highestSetBit);
   3724 
   3725   switch (instr->Mask(NEONShiftImmediateMask)) {
   3726     case NEON_SHL:    shl(vf, rd, rn, left_shift); break;
   3727     case NEON_SLI:    sli(vf, rd, rn, left_shift); break;
   3728     case NEON_SQSHLU: sqshlu(vf, rd, rn, left_shift); break;
   3729     case NEON_SRI:    sri(vf, rd, rn, right_shift); break;
   3730     case NEON_SSHR:   sshr(vf, rd, rn, right_shift); break;
   3731     case NEON_USHR:   ushr(vf, rd, rn, right_shift); break;
   3732     case NEON_SRSHR:  sshr(vf, rd, rn, right_shift).Round(vf); break;
   3733     case NEON_URSHR:  ushr(vf, rd, rn, right_shift).Round(vf); break;
   3734     case NEON_SSRA:   ssra(vf, rd, rn, right_shift); break;
   3735     case NEON_USRA:   usra(vf, rd, rn, right_shift); break;
   3736     case NEON_SRSRA:  srsra(vf, rd, rn, right_shift); break;
   3737     case NEON_URSRA:  ursra(vf, rd, rn, right_shift); break;
   3738     case NEON_SQSHL_imm: sqshl(vf, rd, rn, left_shift); break;
   3739     case NEON_UQSHL_imm: uqshl(vf, rd, rn, left_shift); break;
   3740     case NEON_SCVTF_imm: scvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
   3741     case NEON_UCVTF_imm: ucvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
   3742     case NEON_FCVTZS_imm: fcvts(vf, rd, rn, FPZero, right_shift); break;
   3743     case NEON_FCVTZU_imm: fcvtu(vf, rd, rn, FPZero, right_shift); break;
   3744     case NEON_SSHLL:
   3745       vf = vf_l;
   3746       if (instr->Mask(NEON_Q)) {
   3747         sshll2(vf, rd, rn, left_shift);
   3748       } else {
   3749         sshll(vf, rd, rn, left_shift);
   3750       }
   3751       break;
   3752     case NEON_USHLL:
   3753       vf = vf_l;
   3754       if (instr->Mask(NEON_Q)) {
   3755         ushll2(vf, rd, rn, left_shift);
   3756       } else {
   3757         ushll(vf, rd, rn, left_shift);
   3758       }
   3759       break;
   3760     case NEON_SHRN:
   3761       if (instr->Mask(NEON_Q)) {
   3762         shrn2(vf, rd, rn, right_shift);
   3763       } else {
   3764         shrn(vf, rd, rn, right_shift);
   3765       }
   3766       break;
   3767     case NEON_RSHRN:
   3768       if (instr->Mask(NEON_Q)) {
   3769         rshrn2(vf, rd, rn, right_shift);
   3770       } else {
   3771         rshrn(vf, rd, rn, right_shift);
   3772       }
   3773       break;
   3774     case NEON_UQSHRN:
   3775       if (instr->Mask(NEON_Q)) {
   3776         uqshrn2(vf, rd, rn, right_shift);
   3777       } else {
   3778         uqshrn(vf, rd, rn, right_shift);
   3779       }
   3780       break;
   3781     case NEON_UQRSHRN:
   3782       if (instr->Mask(NEON_Q)) {
   3783         uqrshrn2(vf, rd, rn, right_shift);
   3784       } else {
   3785         uqrshrn(vf, rd, rn, right_shift);
   3786       }
   3787       break;
   3788     case NEON_SQSHRN:
   3789       if (instr->Mask(NEON_Q)) {
   3790         sqshrn2(vf, rd, rn, right_shift);
   3791       } else {
   3792         sqshrn(vf, rd, rn, right_shift);
   3793       }
   3794       break;
   3795     case NEON_SQRSHRN:
   3796       if (instr->Mask(NEON_Q)) {
   3797         sqrshrn2(vf, rd, rn, right_shift);
   3798       } else {
   3799         sqrshrn(vf, rd, rn, right_shift);
   3800       }
   3801       break;
   3802     case NEON_SQSHRUN:
   3803       if (instr->Mask(NEON_Q)) {
   3804         sqshrun2(vf, rd, rn, right_shift);
   3805       } else {
   3806         sqshrun(vf, rd, rn, right_shift);
   3807       }
   3808       break;
   3809     case NEON_SQRSHRUN:
   3810       if (instr->Mask(NEON_Q)) {
   3811         sqrshrun2(vf, rd, rn, right_shift);
   3812       } else {
   3813         sqrshrun(vf, rd, rn, right_shift);
   3814       }
   3815       break;
   3816     default:
   3817       VIXL_UNIMPLEMENTED();
   3818   }
   3819 }
   3820 
   3821 
   3822 void Simulator::VisitNEONTable(const Instruction* instr) {
   3823   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
   3824   VectorFormat vf = nfd.GetVectorFormat();
   3825 
   3826   SimVRegister& rd = vreg(instr->Rd());
   3827   SimVRegister& rn = vreg(instr->Rn());
   3828   SimVRegister& rn2 = vreg((instr->Rn() + 1) % kNumberOfVRegisters);
   3829   SimVRegister& rn3 = vreg((instr->Rn() + 2) % kNumberOfVRegisters);
   3830   SimVRegister& rn4 = vreg((instr->Rn() + 3) % kNumberOfVRegisters);
   3831   SimVRegister& rm = vreg(instr->Rm());
   3832 
   3833   switch (instr->Mask(NEONTableMask)) {
   3834     case NEON_TBL_1v: tbl(vf, rd, rn, rm); break;
   3835     case NEON_TBL_2v: tbl(vf, rd, rn, rn2, rm); break;
   3836     case NEON_TBL_3v: tbl(vf, rd, rn, rn2, rn3, rm); break;
   3837     case NEON_TBL_4v: tbl(vf, rd, rn, rn2, rn3, rn4, rm); break;
   3838     case NEON_TBX_1v: tbx(vf, rd, rn, rm); break;
   3839     case NEON_TBX_2v: tbx(vf, rd, rn, rn2, rm); break;
   3840     case NEON_TBX_3v: tbx(vf, rd, rn, rn2, rn3, rm); break;
   3841     case NEON_TBX_4v: tbx(vf, rd, rn, rn2, rn3, rn4, rm); break;
   3842     default:
   3843       VIXL_UNIMPLEMENTED();
   3844   }
   3845 }
   3846 
   3847 
   3848 void Simulator::VisitNEONPerm(const Instruction* instr) {
   3849   NEONFormatDecoder nfd(instr);
   3850   VectorFormat vf = nfd.GetVectorFormat();
   3851 
   3852   SimVRegister& rd = vreg(instr->Rd());
   3853   SimVRegister& rn = vreg(instr->Rn());
   3854   SimVRegister& rm = vreg(instr->Rm());
   3855 
   3856   switch (instr->Mask(NEONPermMask)) {
   3857     case NEON_TRN1: trn1(vf, rd, rn, rm); break;
   3858     case NEON_TRN2: trn2(vf, rd, rn, rm); break;
   3859     case NEON_UZP1: uzp1(vf, rd, rn, rm); break;
   3860     case NEON_UZP2: uzp2(vf, rd, rn, rm); break;
   3861     case NEON_ZIP1: zip1(vf, rd, rn, rm); break;
   3862     case NEON_ZIP2: zip2(vf, rd, rn, rm); break;
   3863     default:
   3864       VIXL_UNIMPLEMENTED();
   3865   }
   3866 }
   3867 
   3868 
   3869 void Simulator::DoUnreachable(const Instruction* instr) {
   3870   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
   3871               (instr->ImmException() == kUnreachableOpcode));
   3872 
   3873   fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n",
   3874           reinterpret_cast<const void*>(instr));
   3875   abort();
   3876 }
   3877 
   3878 
   3879 void Simulator::DoTrace(const Instruction* instr) {
   3880   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
   3881               (instr->ImmException() == kTraceOpcode));
   3882 
   3883   // Read the arguments encoded inline in the instruction stream.
   3884   uint32_t parameters;
   3885   uint32_t command;
   3886 
   3887   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
   3888   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
   3889   memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
   3890 
   3891   switch (command) {
   3892     case TRACE_ENABLE:
   3893       set_trace_parameters(trace_parameters() | parameters);
   3894       break;
   3895     case TRACE_DISABLE:
   3896       set_trace_parameters(trace_parameters() & ~parameters);
   3897       break;
   3898     default:
   3899       VIXL_UNREACHABLE();
   3900   }
   3901 
   3902   set_pc(instr->InstructionAtOffset(kTraceLength));
   3903 }
   3904 
   3905 
   3906 void Simulator::DoLog(const Instruction* instr) {
   3907   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
   3908               (instr->ImmException() == kLogOpcode));
   3909 
   3910   // Read the arguments encoded inline in the instruction stream.
   3911   uint32_t parameters;
   3912 
   3913   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
   3914   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
   3915 
   3916   // We don't support a one-shot LOG_DISASM.
   3917   VIXL_ASSERT((parameters & LOG_DISASM) == 0);
   3918   // Print the requested information.
   3919   if (parameters & LOG_SYSREGS) PrintSystemRegisters();
   3920   if (parameters & LOG_REGS) PrintRegisters();
   3921   if (parameters & LOG_VREGS) PrintVRegisters();
   3922 
   3923   set_pc(instr->InstructionAtOffset(kLogLength));
   3924 }
   3925 
   3926 
   3927 void Simulator::DoPrintf(const Instruction* instr) {
   3928   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
   3929               (instr->ImmException() == kPrintfOpcode));
   3930 
   3931   // Read the arguments encoded inline in the instruction stream.
   3932   uint32_t arg_count;
   3933   uint32_t arg_pattern_list;
   3934   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
   3935   memcpy(&arg_count,
   3936          instr + kPrintfArgCountOffset,
   3937          sizeof(arg_count));
   3938   memcpy(&arg_pattern_list,
   3939          instr + kPrintfArgPatternListOffset,
   3940          sizeof(arg_pattern_list));
   3941 
   3942   VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
   3943   VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
   3944 
   3945   // We need to call the host printf function with a set of arguments defined by
   3946   // arg_pattern_list. Because we don't know the types and sizes of the
   3947   // arguments, this is very difficult to do in a robust and portable way. To
   3948   // work around the problem, we pick apart the format string, and print one
   3949   // format placeholder at a time.
   3950 
   3951   // Allocate space for the format string. We take a copy, so we can modify it.
   3952   // Leave enough space for one extra character per expected argument (plus the
   3953   // '\0' termination).
   3954   const char * format_base = reg<const char *>(0);
   3955   VIXL_ASSERT(format_base != NULL);
   3956   size_t length = strlen(format_base) + 1;
   3957   char * const format = new char[length + arg_count];
   3958 
   3959   // A list of chunks, each with exactly one format placeholder.
   3960   const char * chunks[kPrintfMaxArgCount];
   3961 
   3962   // Copy the format string and search for format placeholders.
   3963   uint32_t placeholder_count = 0;
   3964   char * format_scratch = format;
   3965   for (size_t i = 0; i < length; i++) {
   3966     if (format_base[i] != '%') {
   3967       *format_scratch++ = format_base[i];
   3968     } else {
   3969       if (format_base[i + 1] == '%') {
   3970         // Ignore explicit "%%" sequences.
   3971         *format_scratch++ = format_base[i];
   3972         i++;
   3973         // Chunks after the first are passed as format strings to printf, so we
   3974         // need to escape '%' characters in those chunks.
   3975         if (placeholder_count > 0) *format_scratch++ = format_base[i];
   3976       } else {
   3977         VIXL_CHECK(placeholder_count < arg_count);
   3978         // Insert '\0' before placeholders, and store their locations.
   3979         *format_scratch++ = '\0';
   3980         chunks[placeholder_count++] = format_scratch;
   3981         *format_scratch++ = format_base[i];
   3982       }
   3983     }
   3984   }
   3985   VIXL_CHECK(placeholder_count == arg_count);
   3986 
   3987   // Finally, call printf with each chunk, passing the appropriate register
   3988   // argument. Normally, printf returns the number of bytes transmitted, so we
   3989   // can emulate a single printf call by adding the result from each chunk. If
   3990   // any call returns a negative (error) value, though, just return that value.
   3991 
   3992   printf("%s", clr_printf);
   3993 
   3994   // Because '\0' is inserted before each placeholder, the first string in
   3995   // 'format' contains no format placeholders and should be printed literally.
   3996   int result = printf("%s", format);
   3997   int pcs_r = 1;      // Start at x1. x0 holds the format string.
   3998   int pcs_f = 0;      // Start at d0.
   3999   if (result >= 0) {
   4000     for (uint32_t i = 0; i < placeholder_count; i++) {
   4001       int part_result = -1;
   4002 
   4003       uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
   4004       arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
   4005       switch (arg_pattern) {
   4006         case kPrintfArgW: part_result = printf(chunks[i], wreg(pcs_r++)); break;
   4007         case kPrintfArgX: part_result = printf(chunks[i], xreg(pcs_r++)); break;
   4008         case kPrintfArgD: part_result = printf(chunks[i], dreg(pcs_f++)); break;
   4009         default: VIXL_UNREACHABLE();
   4010       }
   4011 
   4012       if (part_result < 0) {
   4013         // Handle error values.
   4014         result = part_result;
   4015         break;
   4016       }
   4017 
   4018       result += part_result;
   4019     }
   4020   }
   4021 
   4022   printf("%s", clr_normal);
   4023 
   4024   // Printf returns its result in x0 (just like the C library's printf).
   4025   set_xreg(0, result);
   4026 
   4027   // The printf parameters are inlined in the code, so skip them.
   4028   set_pc(instr->InstructionAtOffset(kPrintfLength));
   4029 
   4030   // Set LR as if we'd just called a native printf function.
   4031   set_lr(pc());
   4032 
   4033   delete[] format;
   4034 }
   4035 
   4036 }  // namespace vixl
   4037 
   4038 #endif  // VIXL_INCLUDE_SIMULATOR
   4039