Home | History | Annotate | Download | only in aarch64
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
     28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
     29 
     30 #include "../assembler-base-vixl.h"
     31 #include "../code-generation-scopes-vixl.h"
     32 #include "../globals-vixl.h"
     33 #include "../invalset-vixl.h"
     34 #include "../utils-vixl.h"
     35 
     36 #include "operands-aarch64.h"
     37 
     38 namespace vixl {
     39 namespace aarch64 {
     40 
     41 class LabelTestHelper;  // Forward declaration.
     42 
     43 
     44 class Label {
     45  public:
     46   Label() : location_(kLocationUnbound) {}
     47   ~Label() {
     48     // All links to a label must have been resolved before it is destructed.
     49     VIXL_ASSERT(!IsLinked());
     50   }
     51 
     52   bool IsBound() const { return location_ >= 0; }
     53   bool IsLinked() const { return !links_.empty(); }
     54 
     55   ptrdiff_t GetLocation() const { return location_; }
     56   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
     57     return GetLocation();
     58   }
     59 
     60   static const int kNPreallocatedLinks = 4;
     61   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
     62   static const size_t kReclaimFrom = 512;
     63   static const size_t kReclaimFactor = 2;
     64 
     65   typedef InvalSet<ptrdiff_t,
     66                    kNPreallocatedLinks,
     67                    ptrdiff_t,
     68                    kInvalidLinkKey,
     69                    kReclaimFrom,
     70                    kReclaimFactor> LinksSetBase;
     71   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
     72 
     73  private:
     74   class LinksSet : public LinksSetBase {
     75    public:
     76     LinksSet() : LinksSetBase() {}
     77   };
     78 
     79   // Allows iterating over the links of a label. The behaviour is undefined if
     80   // the list of links is modified in any way while iterating.
     81   class LabelLinksIterator : public LabelLinksIteratorBase {
     82    public:
     83     explicit LabelLinksIterator(Label* label)
     84         : LabelLinksIteratorBase(&label->links_) {}
     85 
     86     // TODO: Remove these and use the STL-like interface instead.
     87     using LabelLinksIteratorBase::Advance;
     88     using LabelLinksIteratorBase::Current;
     89   };
     90 
     91   void Bind(ptrdiff_t location) {
     92     // Labels can only be bound once.
     93     VIXL_ASSERT(!IsBound());
     94     location_ = location;
     95   }
     96 
     97   void AddLink(ptrdiff_t instruction) {
     98     // If a label is bound, the assembler already has the information it needs
     99     // to write the instruction, so there is no need to add it to links_.
    100     VIXL_ASSERT(!IsBound());
    101     links_.insert(instruction);
    102   }
    103 
    104   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
    105 
    106   void ClearAllLinks() { links_.clear(); }
    107 
    108   // TODO: The comment below considers average case complexity for our
    109   // usual use-cases. The elements of interest are:
    110   // - Branches to a label are emitted in order: branch instructions to a label
    111   // are generated at an offset in the code generation buffer greater than any
    112   // other branch to that same label already generated. As an example, this can
    113   // be broken when an instruction is patched to become a branch. Note that the
    114   // code will still work, but the complexity considerations below may locally
    115   // not apply any more.
    116   // - Veneers are generated in order: for multiple branches of the same type
    117   // branching to the same unbound label going out of range, veneers are
    118   // generated in growing order of the branch instruction offset from the start
    119   // of the buffer.
    120   //
    121   // When creating a veneer for a branch going out of range, the link for this
    122   // branch needs to be removed from this `links_`. Since all branches are
    123   // tracked in one underlying InvalSet, the complexity for this deletion is the
    124   // same as for finding the element, ie. O(n), where n is the number of links
    125   // in the set.
    126   // This could be reduced to O(1) by using the same trick as used when tracking
    127   // branch information for veneers: split the container to use one set per type
    128   // of branch. With that setup, when a veneer is created and the link needs to
    129   // be deleted, if the two points above hold, it must be the minimum element of
    130   // the set for its type of branch, and that minimum element will be accessible
    131   // in O(1).
    132 
    133   // The offsets of the instructions that have linked to this label.
    134   LinksSet links_;
    135   // The label location.
    136   ptrdiff_t location_;
    137 
    138   static const ptrdiff_t kLocationUnbound = -1;
    139 
    140 // It is not safe to copy labels, so disable the copy constructor and operator
    141 // by declaring them private (without an implementation).
    142 #if __cplusplus >= 201103L
    143   Label(const Label&) = delete;
    144   void operator=(const Label&) = delete;
    145 #else
    146   Label(const Label&);
    147   void operator=(const Label&);
    148 #endif
    149 
    150   // The Assembler class is responsible for binding and linking labels, since
    151   // the stored offsets need to be consistent with the Assembler's buffer.
    152   friend class Assembler;
    153   // The MacroAssembler and VeneerPool handle resolution of branches to distant
    154   // targets.
    155   friend class MacroAssembler;
    156   friend class VeneerPool;
    157 };
    158 
    159 
    160 class Assembler;
    161 class LiteralPool;
    162 
    163 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
    164 // stream and loaded through a pc relative load. The same literal can be
    165 // referred to by multiple instructions but a literal can only reside at one
    166 // place in memory. A literal can be used by a load before or after being
    167 // placed in memory.
    168 //
    169 // Internally an offset of 0 is associated with a literal which has been
    170 // neither used nor placed. Then two possibilities arise:
    171 //  1) the label is placed, the offset (stored as offset + 1) is used to
    172 //     resolve any subsequent load using the label.
    173 //  2) the label is not placed and offset is the offset of the last load using
    174 //     the literal (stored as -offset -1). If multiple loads refer to this
    175 //     literal then the last load holds the offset of the preceding load and
    176 //     all loads form a chain. Once the offset is placed all the loads in the
    177 //     chain are resolved and future loads fall back to possibility 1.
    178 class RawLiteral {
    179  public:
    180   enum DeletionPolicy {
    181     kDeletedOnPlacementByPool,
    182     kDeletedOnPoolDestruction,
    183     kManuallyDeleted
    184   };
    185 
    186   RawLiteral(size_t size,
    187              LiteralPool* literal_pool,
    188              DeletionPolicy deletion_policy = kManuallyDeleted);
    189 
    190   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
    191   // actually pointing to `Literal<T>` objects.
    192   virtual ~RawLiteral() {}
    193 
    194   size_t GetSize() const {
    195     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
    196     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
    197     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
    198                 (size_ == kQRegSizeInBytes));
    199     return size_;
    200   }
    201   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
    202 
    203   uint64_t GetRawValue128Low64() const {
    204     VIXL_ASSERT(size_ == kQRegSizeInBytes);
    205     return low64_;
    206   }
    207   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
    208     return GetRawValue128Low64();
    209   }
    210 
    211   uint64_t GetRawValue128High64() const {
    212     VIXL_ASSERT(size_ == kQRegSizeInBytes);
    213     return high64_;
    214   }
    215   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
    216     return GetRawValue128High64();
    217   }
    218 
    219   uint64_t GetRawValue64() const {
    220     VIXL_ASSERT(size_ == kXRegSizeInBytes);
    221     VIXL_ASSERT(high64_ == 0);
    222     return low64_;
    223   }
    224   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
    225     return GetRawValue64();
    226   }
    227 
    228   uint32_t GetRawValue32() const {
    229     VIXL_ASSERT(size_ == kWRegSizeInBytes);
    230     VIXL_ASSERT(high64_ == 0);
    231     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
    232     return static_cast<uint32_t>(low64_);
    233   }
    234   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
    235     return GetRawValue32();
    236   }
    237 
    238   bool IsUsed() const { return offset_ < 0; }
    239   bool IsPlaced() const { return offset_ > 0; }
    240 
    241   LiteralPool* GetLiteralPool() const { return literal_pool_; }
    242 
    243   ptrdiff_t GetOffset() const {
    244     VIXL_ASSERT(IsPlaced());
    245     return offset_ - 1;
    246   }
    247   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
    248 
    249  protected:
    250   void SetOffset(ptrdiff_t offset) {
    251     VIXL_ASSERT(offset >= 0);
    252     VIXL_ASSERT(IsWordAligned(offset));
    253     VIXL_ASSERT(!IsPlaced());
    254     offset_ = offset + 1;
    255   }
    256   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
    257     SetOffset(offset);
    258   }
    259 
    260   ptrdiff_t GetLastUse() const {
    261     VIXL_ASSERT(IsUsed());
    262     return -offset_ - 1;
    263   }
    264   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
    265 
    266   void SetLastUse(ptrdiff_t offset) {
    267     VIXL_ASSERT(offset >= 0);
    268     VIXL_ASSERT(IsWordAligned(offset));
    269     VIXL_ASSERT(!IsPlaced());
    270     offset_ = -offset - 1;
    271   }
    272   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
    273     SetLastUse(offset);
    274   }
    275 
    276   size_t size_;
    277   ptrdiff_t offset_;
    278   uint64_t low64_;
    279   uint64_t high64_;
    280 
    281  private:
    282   LiteralPool* literal_pool_;
    283   DeletionPolicy deletion_policy_;
    284 
    285   friend class Assembler;
    286   friend class LiteralPool;
    287 };
    288 
    289 
    290 template <typename T>
    291 class Literal : public RawLiteral {
    292  public:
    293   explicit Literal(T value,
    294                    LiteralPool* literal_pool = NULL,
    295                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
    296       : RawLiteral(sizeof(value), literal_pool, ownership) {
    297     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
    298     UpdateValue(value);
    299   }
    300 
    301   Literal(T high64,
    302           T low64,
    303           LiteralPool* literal_pool = NULL,
    304           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
    305       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
    306     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
    307     UpdateValue(high64, low64);
    308   }
    309 
    310   virtual ~Literal() {}
    311 
    312   // Update the value of this literal, if necessary by rewriting the value in
    313   // the pool.
    314   // If the literal has already been placed in a literal pool, the address of
    315   // the start of the code buffer must be provided, as the literal only knows it
    316   // offset from there. This also allows patching the value after the code has
    317   // been moved in memory.
    318   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
    319     VIXL_ASSERT(sizeof(new_value) == size_);
    320     memcpy(&low64_, &new_value, sizeof(new_value));
    321     if (IsPlaced()) {
    322       VIXL_ASSERT(code_buffer != NULL);
    323       RewriteValueInCode(code_buffer);
    324     }
    325   }
    326 
    327   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
    328     VIXL_ASSERT(sizeof(low64) == size_ / 2);
    329     memcpy(&low64_, &low64, sizeof(low64));
    330     memcpy(&high64_, &high64, sizeof(high64));
    331     if (IsPlaced()) {
    332       VIXL_ASSERT(code_buffer != NULL);
    333       RewriteValueInCode(code_buffer);
    334     }
    335   }
    336 
    337   void UpdateValue(T new_value, const Assembler* assembler);
    338   void UpdateValue(T high64, T low64, const Assembler* assembler);
    339 
    340  private:
    341   void RewriteValueInCode(uint8_t* code_buffer) {
    342     VIXL_ASSERT(IsPlaced());
    343     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
    344     switch (GetSize()) {
    345       case kSRegSizeInBytes:
    346         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
    347             GetRawValue32();
    348         break;
    349       case kDRegSizeInBytes:
    350         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
    351             GetRawValue64();
    352         break;
    353       default:
    354         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
    355         uint64_t* base_address =
    356             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
    357         *base_address = GetRawValue128Low64();
    358         *(base_address + 1) = GetRawValue128High64();
    359     }
    360   }
    361 };
    362 
    363 
    364 // Control whether or not position-independent code should be emitted.
    365 enum PositionIndependentCodeOption {
    366   // All code generated will be position-independent; all branches and
    367   // references to labels generated with the Label class will use PC-relative
    368   // addressing.
    369   PositionIndependentCode,
    370 
    371   // Allow VIXL to generate code that refers to absolute addresses. With this
    372   // option, it will not be possible to copy the code buffer and run it from a
    373   // different address; code must be generated in its final location.
    374   PositionDependentCode,
    375 
    376   // Allow VIXL to assume that the bottom 12 bits of the address will be
    377   // constant, but that the top 48 bits may change. This allows `adrp` to
    378   // function in systems which copy code between pages, but otherwise maintain
    379   // 4KB page alignment.
    380   PageOffsetDependentCode
    381 };
    382 
    383 
    384 // Control how scaled- and unscaled-offset loads and stores are generated.
    385 enum LoadStoreScalingOption {
    386   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
    387   // register-offset, pre-index or post-index instructions if necessary.
    388   PreferScaledOffset,
    389 
    390   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
    391   // register-offset, pre-index or post-index instructions if necessary.
    392   PreferUnscaledOffset,
    393 
    394   // Require scaled-immediate-offset instructions.
    395   RequireScaledOffset,
    396 
    397   // Require unscaled-immediate-offset instructions.
    398   RequireUnscaledOffset
    399 };
    400 
    401 
    402 // Assembler.
    403 class Assembler : public vixl::internal::AssemblerBase {
    404  public:
    405   explicit Assembler(
    406       PositionIndependentCodeOption pic = PositionIndependentCode)
    407       : pic_(pic) {}
    408   explicit Assembler(
    409       size_t capacity,
    410       PositionIndependentCodeOption pic = PositionIndependentCode)
    411       : AssemblerBase(capacity), pic_(pic) {}
    412   Assembler(byte* buffer,
    413             size_t capacity,
    414             PositionIndependentCodeOption pic = PositionIndependentCode)
    415       : AssemblerBase(buffer, capacity), pic_(pic) {}
    416 
    417   // Upon destruction, the code will assert that one of the following is true:
    418   //  * The Assembler object has not been used.
    419   //  * Nothing has been emitted since the last Reset() call.
    420   //  * Nothing has been emitted since the last FinalizeCode() call.
    421   ~Assembler() {}
    422 
    423   // System functions.
    424 
    425   // Start generating code from the beginning of the buffer, discarding any code
    426   // and data that has already been emitted into the buffer.
    427   void Reset();
    428 
    429   // Label.
    430   // Bind a label to the current PC.
    431   void bind(Label* label);
    432 
    433   // Bind a label to a specified offset from the start of the buffer.
    434   void BindToOffset(Label* label, ptrdiff_t offset);
    435 
    436   // Place a literal at the current PC.
    437   void place(RawLiteral* literal);
    438 
    439   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
    440     return GetCursorOffset();
    441   }
    442 
    443   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
    444                   ptrdiff_t GetBufferEndOffset() const) {
    445     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
    446   }
    447   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
    448                   ptrdiff_t BufferEndOffset() const) {
    449     return GetBuffer().GetCapacity();
    450   }
    451 
    452   // Return the address of a bound label.
    453   template <typename T>
    454   T GetLabelAddress(const Label* label) const {
    455     VIXL_ASSERT(label->IsBound());
    456     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
    457     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
    458   }
    459 
    460   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
    461     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
    462   }
    463   VIXL_DEPRECATED("GetInstructionAt",
    464                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
    465     return GetInstructionAt(instruction_offset);
    466   }
    467 
    468   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
    469     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
    470     ptrdiff_t offset =
    471         instruction - GetBuffer()->GetStartAddress<Instruction*>();
    472     VIXL_ASSERT((0 <= offset) &&
    473                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
    474     return offset;
    475   }
    476   VIXL_DEPRECATED("GetInstructionOffset",
    477                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
    478     return GetInstructionOffset(instruction);
    479   }
    480 
    481   // Instruction set functions.
    482 
    483   // Branch / Jump instructions.
    484   // Branch to register.
    485   void br(const Register& xn);
    486 
    487   // Branch with link to register.
    488   void blr(const Register& xn);
    489 
    490   // Branch to register with return hint.
    491   void ret(const Register& xn = lr);
    492 
    493   // Unconditional branch to label.
    494   void b(Label* label);
    495 
    496   // Conditional branch to label.
    497   void b(Label* label, Condition cond);
    498 
    499   // Unconditional branch to PC offset.
    500   void b(int64_t imm26);
    501 
    502   // Conditional branch to PC offset.
    503   void b(int64_t imm19, Condition cond);
    504 
    505   // Branch with link to label.
    506   void bl(Label* label);
    507 
    508   // Branch with link to PC offset.
    509   void bl(int64_t imm26);
    510 
    511   // Compare and branch to label if zero.
    512   void cbz(const Register& rt, Label* label);
    513 
    514   // Compare and branch to PC offset if zero.
    515   void cbz(const Register& rt, int64_t imm19);
    516 
    517   // Compare and branch to label if not zero.
    518   void cbnz(const Register& rt, Label* label);
    519 
    520   // Compare and branch to PC offset if not zero.
    521   void cbnz(const Register& rt, int64_t imm19);
    522 
    523   // Table lookup from one register.
    524   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
    525 
    526   // Table lookup from two registers.
    527   void tbl(const VRegister& vd,
    528            const VRegister& vn,
    529            const VRegister& vn2,
    530            const VRegister& vm);
    531 
    532   // Table lookup from three registers.
    533   void tbl(const VRegister& vd,
    534            const VRegister& vn,
    535            const VRegister& vn2,
    536            const VRegister& vn3,
    537            const VRegister& vm);
    538 
    539   // Table lookup from four registers.
    540   void tbl(const VRegister& vd,
    541            const VRegister& vn,
    542            const VRegister& vn2,
    543            const VRegister& vn3,
    544            const VRegister& vn4,
    545            const VRegister& vm);
    546 
    547   // Table lookup extension from one register.
    548   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
    549 
    550   // Table lookup extension from two registers.
    551   void tbx(const VRegister& vd,
    552            const VRegister& vn,
    553            const VRegister& vn2,
    554            const VRegister& vm);
    555 
    556   // Table lookup extension from three registers.
    557   void tbx(const VRegister& vd,
    558            const VRegister& vn,
    559            const VRegister& vn2,
    560            const VRegister& vn3,
    561            const VRegister& vm);
    562 
    563   // Table lookup extension from four registers.
    564   void tbx(const VRegister& vd,
    565            const VRegister& vn,
    566            const VRegister& vn2,
    567            const VRegister& vn3,
    568            const VRegister& vn4,
    569            const VRegister& vm);
    570 
    571   // Test bit and branch to label if zero.
    572   void tbz(const Register& rt, unsigned bit_pos, Label* label);
    573 
    574   // Test bit and branch to PC offset if zero.
    575   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
    576 
    577   // Test bit and branch to label if not zero.
    578   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
    579 
    580   // Test bit and branch to PC offset if not zero.
    581   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
    582 
    583   // Address calculation instructions.
    584   // Calculate a PC-relative address. Unlike for branches the offset in adr is
    585   // unscaled (i.e. the result can be unaligned).
    586 
    587   // Calculate the address of a label.
    588   void adr(const Register& xd, Label* label);
    589 
    590   // Calculate the address of a PC offset.
    591   void adr(const Register& xd, int64_t imm21);
    592 
    593   // Calculate the page address of a label.
    594   void adrp(const Register& xd, Label* label);
    595 
    596   // Calculate the page address of a PC offset.
    597   void adrp(const Register& xd, int64_t imm21);
    598 
    599   // Data Processing instructions.
    600   // Add.
    601   void add(const Register& rd, const Register& rn, const Operand& operand);
    602 
    603   // Add and update status flags.
    604   void adds(const Register& rd, const Register& rn, const Operand& operand);
    605 
    606   // Compare negative.
    607   void cmn(const Register& rn, const Operand& operand);
    608 
    609   // Subtract.
    610   void sub(const Register& rd, const Register& rn, const Operand& operand);
    611 
    612   // Subtract and update status flags.
    613   void subs(const Register& rd, const Register& rn, const Operand& operand);
    614 
    615   // Compare.
    616   void cmp(const Register& rn, const Operand& operand);
    617 
    618   // Negate.
    619   void neg(const Register& rd, const Operand& operand);
    620 
    621   // Negate and update status flags.
    622   void negs(const Register& rd, const Operand& operand);
    623 
    624   // Add with carry bit.
    625   void adc(const Register& rd, const Register& rn, const Operand& operand);
    626 
    627   // Add with carry bit and update status flags.
    628   void adcs(const Register& rd, const Register& rn, const Operand& operand);
    629 
    630   // Subtract with carry bit.
    631   void sbc(const Register& rd, const Register& rn, const Operand& operand);
    632 
    633   // Subtract with carry bit and update status flags.
    634   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
    635 
    636   // Negate with carry bit.
    637   void ngc(const Register& rd, const Operand& operand);
    638 
    639   // Negate with carry bit and update status flags.
    640   void ngcs(const Register& rd, const Operand& operand);
    641 
    642   // Logical instructions.
    643   // Bitwise and (A & B).
    644   void and_(const Register& rd, const Register& rn, const Operand& operand);
    645 
    646   // Bitwise and (A & B) and update status flags.
    647   void ands(const Register& rd, const Register& rn, const Operand& operand);
    648 
    649   // Bit test and set flags.
    650   void tst(const Register& rn, const Operand& operand);
    651 
    652   // Bit clear (A & ~B).
    653   void bic(const Register& rd, const Register& rn, const Operand& operand);
    654 
    655   // Bit clear (A & ~B) and update status flags.
    656   void bics(const Register& rd, const Register& rn, const Operand& operand);
    657 
    658   // Bitwise or (A | B).
    659   void orr(const Register& rd, const Register& rn, const Operand& operand);
    660 
    661   // Bitwise nor (A | ~B).
    662   void orn(const Register& rd, const Register& rn, const Operand& operand);
    663 
    664   // Bitwise eor/xor (A ^ B).
    665   void eor(const Register& rd, const Register& rn, const Operand& operand);
    666 
    667   // Bitwise enor/xnor (A ^ ~B).
    668   void eon(const Register& rd, const Register& rn, const Operand& operand);
    669 
    670   // Logical shift left by variable.
    671   void lslv(const Register& rd, const Register& rn, const Register& rm);
    672 
    673   // Logical shift right by variable.
    674   void lsrv(const Register& rd, const Register& rn, const Register& rm);
    675 
    676   // Arithmetic shift right by variable.
    677   void asrv(const Register& rd, const Register& rn, const Register& rm);
    678 
    679   // Rotate right by variable.
    680   void rorv(const Register& rd, const Register& rn, const Register& rm);
    681 
    682   // Bitfield instructions.
    683   // Bitfield move.
    684   void bfm(const Register& rd,
    685            const Register& rn,
    686            unsigned immr,
    687            unsigned imms);
    688 
    689   // Signed bitfield move.
    690   void sbfm(const Register& rd,
    691             const Register& rn,
    692             unsigned immr,
    693             unsigned imms);
    694 
    695   // Unsigned bitfield move.
    696   void ubfm(const Register& rd,
    697             const Register& rn,
    698             unsigned immr,
    699             unsigned imms);
    700 
    701   // Bfm aliases.
    702   // Bitfield insert.
    703   void bfi(const Register& rd,
    704            const Register& rn,
    705            unsigned lsb,
    706            unsigned width) {
    707     VIXL_ASSERT(width >= 1);
    708     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    709     bfm(rd,
    710         rn,
    711         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
    712         width - 1);
    713   }
    714 
    715   // Bitfield extract and insert low.
    716   void bfxil(const Register& rd,
    717              const Register& rn,
    718              unsigned lsb,
    719              unsigned width) {
    720     VIXL_ASSERT(width >= 1);
    721     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    722     bfm(rd, rn, lsb, lsb + width - 1);
    723   }
    724 
    725   // Sbfm aliases.
    726   // Arithmetic shift right.
    727   void asr(const Register& rd, const Register& rn, unsigned shift) {
    728     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
    729     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
    730   }
    731 
    732   // Signed bitfield insert with zero at right.
    733   void sbfiz(const Register& rd,
    734              const Register& rn,
    735              unsigned lsb,
    736              unsigned width) {
    737     VIXL_ASSERT(width >= 1);
    738     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    739     sbfm(rd,
    740          rn,
    741          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
    742          width - 1);
    743   }
    744 
    745   // Signed bitfield extract.
    746   void sbfx(const Register& rd,
    747             const Register& rn,
    748             unsigned lsb,
    749             unsigned width) {
    750     VIXL_ASSERT(width >= 1);
    751     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    752     sbfm(rd, rn, lsb, lsb + width - 1);
    753   }
    754 
    755   // Signed extend byte.
    756   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
    757 
    758   // Signed extend halfword.
    759   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
    760 
    761   // Signed extend word.
    762   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
    763 
    764   // Ubfm aliases.
    765   // Logical shift left.
    766   void lsl(const Register& rd, const Register& rn, unsigned shift) {
    767     unsigned reg_size = rd.GetSizeInBits();
    768     VIXL_ASSERT(shift < reg_size);
    769     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
    770   }
    771 
    772   // Logical shift right.
    773   void lsr(const Register& rd, const Register& rn, unsigned shift) {
    774     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
    775     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
    776   }
    777 
    778   // Unsigned bitfield insert with zero at right.
    779   void ubfiz(const Register& rd,
    780              const Register& rn,
    781              unsigned lsb,
    782              unsigned width) {
    783     VIXL_ASSERT(width >= 1);
    784     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    785     ubfm(rd,
    786          rn,
    787          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
    788          width - 1);
    789   }
    790 
    791   // Unsigned bitfield extract.
    792   void ubfx(const Register& rd,
    793             const Register& rn,
    794             unsigned lsb,
    795             unsigned width) {
    796     VIXL_ASSERT(width >= 1);
    797     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    798     ubfm(rd, rn, lsb, lsb + width - 1);
    799   }
    800 
    801   // Unsigned extend byte.
    802   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
    803 
    804   // Unsigned extend halfword.
    805   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
    806 
    807   // Unsigned extend word.
    808   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
    809 
    810   // Extract.
    811   void extr(const Register& rd,
    812             const Register& rn,
    813             const Register& rm,
    814             unsigned lsb);
    815 
    816   // Conditional select: rd = cond ? rn : rm.
    817   void csel(const Register& rd,
    818             const Register& rn,
    819             const Register& rm,
    820             Condition cond);
    821 
    822   // Conditional select increment: rd = cond ? rn : rm + 1.
    823   void csinc(const Register& rd,
    824              const Register& rn,
    825              const Register& rm,
    826              Condition cond);
    827 
    828   // Conditional select inversion: rd = cond ? rn : ~rm.
    829   void csinv(const Register& rd,
    830              const Register& rn,
    831              const Register& rm,
    832              Condition cond);
    833 
    834   // Conditional select negation: rd = cond ? rn : -rm.
    835   void csneg(const Register& rd,
    836              const Register& rn,
    837              const Register& rm,
    838              Condition cond);
    839 
    840   // Conditional set: rd = cond ? 1 : 0.
    841   void cset(const Register& rd, Condition cond);
    842 
    843   // Conditional set mask: rd = cond ? -1 : 0.
    844   void csetm(const Register& rd, Condition cond);
    845 
    846   // Conditional increment: rd = cond ? rn + 1 : rn.
    847   void cinc(const Register& rd, const Register& rn, Condition cond);
    848 
    849   // Conditional invert: rd = cond ? ~rn : rn.
    850   void cinv(const Register& rd, const Register& rn, Condition cond);
    851 
    852   // Conditional negate: rd = cond ? -rn : rn.
    853   void cneg(const Register& rd, const Register& rn, Condition cond);
    854 
    855   // Rotate right.
    856   void ror(const Register& rd, const Register& rs, unsigned shift) {
    857     extr(rd, rs, rs, shift);
    858   }
    859 
    860   // Conditional comparison.
    861   // Conditional compare negative.
    862   void ccmn(const Register& rn,
    863             const Operand& operand,
    864             StatusFlags nzcv,
    865             Condition cond);
    866 
    867   // Conditional compare.
    868   void ccmp(const Register& rn,
    869             const Operand& operand,
    870             StatusFlags nzcv,
    871             Condition cond);
    872 
    873   // CRC-32 checksum from byte.
    874   void crc32b(const Register& wd, const Register& wn, const Register& wm);
    875 
    876   // CRC-32 checksum from half-word.
    877   void crc32h(const Register& wd, const Register& wn, const Register& wm);
    878 
    879   // CRC-32 checksum from word.
    880   void crc32w(const Register& wd, const Register& wn, const Register& wm);
    881 
    882   // CRC-32 checksum from double word.
    883   void crc32x(const Register& wd, const Register& wn, const Register& xm);
    884 
    885   // CRC-32 C checksum from byte.
    886   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
    887 
    888   // CRC-32 C checksum from half-word.
    889   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
    890 
    891   // CRC-32 C checksum from word.
    892   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
    893 
    894   // CRC-32C checksum from double word.
    895   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
    896 
    897   // Multiply.
    898   void mul(const Register& rd, const Register& rn, const Register& rm);
    899 
    900   // Negated multiply.
    901   void mneg(const Register& rd, const Register& rn, const Register& rm);
    902 
    903   // Signed long multiply: 32 x 32 -> 64-bit.
    904   void smull(const Register& xd, const Register& wn, const Register& wm);
    905 
    906   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
    907   void smulh(const Register& xd, const Register& xn, const Register& xm);
    908 
    909   // Multiply and accumulate.
    910   void madd(const Register& rd,
    911             const Register& rn,
    912             const Register& rm,
    913             const Register& ra);
    914 
    915   // Multiply and subtract.
    916   void msub(const Register& rd,
    917             const Register& rn,
    918             const Register& rm,
    919             const Register& ra);
    920 
    921   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
    922   void smaddl(const Register& xd,
    923               const Register& wn,
    924               const Register& wm,
    925               const Register& xa);
    926 
    927   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
    928   void umaddl(const Register& xd,
    929               const Register& wn,
    930               const Register& wm,
    931               const Register& xa);
    932 
    933   // Unsigned long multiply: 32 x 32 -> 64-bit.
    934   void umull(const Register& xd, const Register& wn, const Register& wm) {
    935     umaddl(xd, wn, wm, xzr);
    936   }
    937 
    938   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
    939   void umulh(const Register& xd, const Register& xn, const Register& xm);
    940 
    941   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
    942   void smsubl(const Register& xd,
    943               const Register& wn,
    944               const Register& wm,
    945               const Register& xa);
    946 
    947   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
    948   void umsubl(const Register& xd,
    949               const Register& wn,
    950               const Register& wm,
    951               const Register& xa);
    952 
    953   // Signed integer divide.
    954   void sdiv(const Register& rd, const Register& rn, const Register& rm);
    955 
    956   // Unsigned integer divide.
    957   void udiv(const Register& rd, const Register& rn, const Register& rm);
    958 
    959   // Bit reverse.
    960   void rbit(const Register& rd, const Register& rn);
    961 
    962   // Reverse bytes in 16-bit half words.
    963   void rev16(const Register& rd, const Register& rn);
    964 
    965   // Reverse bytes in 32-bit words.
    966   void rev32(const Register& xd, const Register& xn);
    967 
    968   // Reverse bytes.
    969   void rev(const Register& rd, const Register& rn);
    970 
    971   // Count leading zeroes.
    972   void clz(const Register& rd, const Register& rn);
    973 
    974   // Count leading sign bits.
    975   void cls(const Register& rd, const Register& rn);
    976 
    977   // Memory instructions.
    978   // Load integer or FP register.
    979   void ldr(const CPURegister& rt,
    980            const MemOperand& src,
    981            LoadStoreScalingOption option = PreferScaledOffset);
    982 
    983   // Store integer or FP register.
    984   void str(const CPURegister& rt,
    985            const MemOperand& dst,
    986            LoadStoreScalingOption option = PreferScaledOffset);
    987 
    988   // Load word with sign extension.
    989   void ldrsw(const Register& xt,
    990              const MemOperand& src,
    991              LoadStoreScalingOption option = PreferScaledOffset);
    992 
    993   // Load byte.
    994   void ldrb(const Register& rt,
    995             const MemOperand& src,
    996             LoadStoreScalingOption option = PreferScaledOffset);
    997 
    998   // Store byte.
    999   void strb(const Register& rt,
   1000             const MemOperand& dst,
   1001             LoadStoreScalingOption option = PreferScaledOffset);
   1002 
   1003   // Load byte with sign extension.
   1004   void ldrsb(const Register& rt,
   1005              const MemOperand& src,
   1006              LoadStoreScalingOption option = PreferScaledOffset);
   1007 
   1008   // Load half-word.
   1009   void ldrh(const Register& rt,
   1010             const MemOperand& src,
   1011             LoadStoreScalingOption option = PreferScaledOffset);
   1012 
   1013   // Store half-word.
   1014   void strh(const Register& rt,
   1015             const MemOperand& dst,
   1016             LoadStoreScalingOption option = PreferScaledOffset);
   1017 
   1018   // Load half-word with sign extension.
   1019   void ldrsh(const Register& rt,
   1020              const MemOperand& src,
   1021              LoadStoreScalingOption option = PreferScaledOffset);
   1022 
   1023   // Load integer or FP register (with unscaled offset).
   1024   void ldur(const CPURegister& rt,
   1025             const MemOperand& src,
   1026             LoadStoreScalingOption option = PreferUnscaledOffset);
   1027 
   1028   // Store integer or FP register (with unscaled offset).
   1029   void stur(const CPURegister& rt,
   1030             const MemOperand& src,
   1031             LoadStoreScalingOption option = PreferUnscaledOffset);
   1032 
   1033   // Load word with sign extension.
   1034   void ldursw(const Register& xt,
   1035               const MemOperand& src,
   1036               LoadStoreScalingOption option = PreferUnscaledOffset);
   1037 
   1038   // Load byte (with unscaled offset).
   1039   void ldurb(const Register& rt,
   1040              const MemOperand& src,
   1041              LoadStoreScalingOption option = PreferUnscaledOffset);
   1042 
   1043   // Store byte (with unscaled offset).
   1044   void sturb(const Register& rt,
   1045              const MemOperand& dst,
   1046              LoadStoreScalingOption option = PreferUnscaledOffset);
   1047 
   1048   // Load byte with sign extension (and unscaled offset).
   1049   void ldursb(const Register& rt,
   1050               const MemOperand& src,
   1051               LoadStoreScalingOption option = PreferUnscaledOffset);
   1052 
   1053   // Load half-word (with unscaled offset).
   1054   void ldurh(const Register& rt,
   1055              const MemOperand& src,
   1056              LoadStoreScalingOption option = PreferUnscaledOffset);
   1057 
   1058   // Store half-word (with unscaled offset).
   1059   void sturh(const Register& rt,
   1060              const MemOperand& dst,
   1061              LoadStoreScalingOption option = PreferUnscaledOffset);
   1062 
   1063   // Load half-word with sign extension (and unscaled offset).
   1064   void ldursh(const Register& rt,
   1065               const MemOperand& src,
   1066               LoadStoreScalingOption option = PreferUnscaledOffset);
   1067 
   1068   // Load integer or FP register pair.
   1069   void ldp(const CPURegister& rt,
   1070            const CPURegister& rt2,
   1071            const MemOperand& src);
   1072 
   1073   // Store integer or FP register pair.
   1074   void stp(const CPURegister& rt,
   1075            const CPURegister& rt2,
   1076            const MemOperand& dst);
   1077 
   1078   // Load word pair with sign extension.
   1079   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
   1080 
   1081   // Load integer or FP register pair, non-temporal.
   1082   void ldnp(const CPURegister& rt,
   1083             const CPURegister& rt2,
   1084             const MemOperand& src);
   1085 
   1086   // Store integer or FP register pair, non-temporal.
   1087   void stnp(const CPURegister& rt,
   1088             const CPURegister& rt2,
   1089             const MemOperand& dst);
   1090 
   1091   // Load integer or FP register from literal pool.
   1092   void ldr(const CPURegister& rt, RawLiteral* literal);
   1093 
   1094   // Load word with sign extension from literal pool.
   1095   void ldrsw(const Register& xt, RawLiteral* literal);
   1096 
   1097   // Load integer or FP register from pc + imm19 << 2.
   1098   void ldr(const CPURegister& rt, int64_t imm19);
   1099 
   1100   // Load word with sign extension from pc + imm19 << 2.
   1101   void ldrsw(const Register& xt, int64_t imm19);
   1102 
   1103   // Store exclusive byte.
   1104   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
   1105 
   1106   // Store exclusive half-word.
   1107   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
   1108 
   1109   // Store exclusive register.
   1110   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
   1111 
   1112   // Load exclusive byte.
   1113   void ldxrb(const Register& rt, const MemOperand& src);
   1114 
   1115   // Load exclusive half-word.
   1116   void ldxrh(const Register& rt, const MemOperand& src);
   1117 
   1118   // Load exclusive register.
   1119   void ldxr(const Register& rt, const MemOperand& src);
   1120 
   1121   // Store exclusive register pair.
   1122   void stxp(const Register& rs,
   1123             const Register& rt,
   1124             const Register& rt2,
   1125             const MemOperand& dst);
   1126 
   1127   // Load exclusive register pair.
   1128   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
   1129 
   1130   // Store-release exclusive byte.
   1131   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
   1132 
   1133   // Store-release exclusive half-word.
   1134   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
   1135 
   1136   // Store-release exclusive register.
   1137   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
   1138 
   1139   // Load-acquire exclusive byte.
   1140   void ldaxrb(const Register& rt, const MemOperand& src);
   1141 
   1142   // Load-acquire exclusive half-word.
   1143   void ldaxrh(const Register& rt, const MemOperand& src);
   1144 
   1145   // Load-acquire exclusive register.
   1146   void ldaxr(const Register& rt, const MemOperand& src);
   1147 
   1148   // Store-release exclusive register pair.
   1149   void stlxp(const Register& rs,
   1150              const Register& rt,
   1151              const Register& rt2,
   1152              const MemOperand& dst);
   1153 
   1154   // Load-acquire exclusive register pair.
   1155   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
   1156 
   1157   // Store-release byte.
   1158   void stlrb(const Register& rt, const MemOperand& dst);
   1159 
   1160   // Store-release half-word.
   1161   void stlrh(const Register& rt, const MemOperand& dst);
   1162 
   1163   // Store-release register.
   1164   void stlr(const Register& rt, const MemOperand& dst);
   1165 
   1166   // Load-acquire byte.
   1167   void ldarb(const Register& rt, const MemOperand& src);
   1168 
   1169   // Load-acquire half-word.
   1170   void ldarh(const Register& rt, const MemOperand& src);
   1171 
   1172   // Load-acquire register.
   1173   void ldar(const Register& rt, const MemOperand& src);
   1174 
   1175   // Prefetch memory.
   1176   void prfm(PrefetchOperation op,
   1177             const MemOperand& addr,
   1178             LoadStoreScalingOption option = PreferScaledOffset);
   1179 
   1180   // Prefetch memory (with unscaled offset).
   1181   void prfum(PrefetchOperation op,
   1182              const MemOperand& addr,
   1183              LoadStoreScalingOption option = PreferUnscaledOffset);
   1184 
   1185   // Prefetch memory in the literal pool.
   1186   void prfm(PrefetchOperation op, RawLiteral* literal);
   1187 
   1188   // Prefetch from pc + imm19 << 2.
   1189   void prfm(PrefetchOperation op, int64_t imm19);
   1190 
   1191   // Move instructions. The default shift of -1 indicates that the move
   1192   // instruction will calculate an appropriate 16-bit immediate and left shift
   1193   // that is equal to the 64-bit immediate argument. If an explicit left shift
   1194   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
   1195   //
   1196   // For movk, an explicit shift can be used to indicate which half word should
   1197   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
   1198   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
   1199   // most-significant.
   1200 
   1201   // Move immediate and keep.
   1202   void movk(const Register& rd, uint64_t imm, int shift = -1) {
   1203     MoveWide(rd, imm, shift, MOVK);
   1204   }
   1205 
   1206   // Move inverted immediate.
   1207   void movn(const Register& rd, uint64_t imm, int shift = -1) {
   1208     MoveWide(rd, imm, shift, MOVN);
   1209   }
   1210 
   1211   // Move immediate.
   1212   void movz(const Register& rd, uint64_t imm, int shift = -1) {
   1213     MoveWide(rd, imm, shift, MOVZ);
   1214   }
   1215 
   1216   // Misc instructions.
   1217   // Monitor debug-mode breakpoint.
   1218   void brk(int code);
   1219 
   1220   // Halting debug-mode breakpoint.
   1221   void hlt(int code);
   1222 
   1223   // Generate exception targeting EL1.
   1224   void svc(int code);
   1225 
   1226   // Move register to register.
   1227   void mov(const Register& rd, const Register& rn);
   1228 
   1229   // Move inverted operand to register.
   1230   void mvn(const Register& rd, const Operand& operand);
   1231 
   1232   // System instructions.
   1233   // Move to register from system register.
   1234   void mrs(const Register& xt, SystemRegister sysreg);
   1235 
   1236   // Move from register to system register.
   1237   void msr(SystemRegister sysreg, const Register& xt);
   1238 
   1239   // System instruction.
   1240   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
   1241 
   1242   // System instruction with pre-encoded op (op1:crn:crm:op2).
   1243   void sys(int op, const Register& xt = xzr);
   1244 
   1245   // System data cache operation.
   1246   void dc(DataCacheOp op, const Register& rt);
   1247 
   1248   // System instruction cache operation.
   1249   void ic(InstructionCacheOp op, const Register& rt);
   1250 
   1251   // System hint.
   1252   void hint(SystemHint code);
   1253 
   1254   // Clear exclusive monitor.
   1255   void clrex(int imm4 = 0xf);
   1256 
   1257   // Data memory barrier.
   1258   void dmb(BarrierDomain domain, BarrierType type);
   1259 
   1260   // Data synchronization barrier.
   1261   void dsb(BarrierDomain domain, BarrierType type);
   1262 
   1263   // Instruction synchronization barrier.
   1264   void isb();
   1265 
   1266   // Alias for system instructions.
   1267   // No-op.
   1268   void nop() { hint(NOP); }
   1269 
   1270   // FP and NEON instructions.
   1271   // Move double precision immediate to FP register.
   1272   void fmov(const VRegister& vd, double imm);
   1273 
   1274   // Move single precision immediate to FP register.
   1275   void fmov(const VRegister& vd, float imm);
   1276 
   1277   // Move FP register to register.
   1278   void fmov(const Register& rd, const VRegister& fn);
   1279 
   1280   // Move register to FP register.
   1281   void fmov(const VRegister& vd, const Register& rn);
   1282 
   1283   // Move FP register to FP register.
   1284   void fmov(const VRegister& vd, const VRegister& fn);
   1285 
   1286   // Move 64-bit register to top half of 128-bit FP register.
   1287   void fmov(const VRegister& vd, int index, const Register& rn);
   1288 
   1289   // Move top half of 128-bit FP register to 64-bit register.
   1290   void fmov(const Register& rd, const VRegister& vn, int index);
   1291 
   1292   // FP add.
   1293   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1294 
   1295   // FP subtract.
   1296   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1297 
   1298   // FP multiply.
   1299   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1300 
   1301   // FP fused multiply-add.
   1302   void fmadd(const VRegister& vd,
   1303              const VRegister& vn,
   1304              const VRegister& vm,
   1305              const VRegister& va);
   1306 
   1307   // FP fused multiply-subtract.
   1308   void fmsub(const VRegister& vd,
   1309              const VRegister& vn,
   1310              const VRegister& vm,
   1311              const VRegister& va);
   1312 
   1313   // FP fused multiply-add and negate.
   1314   void fnmadd(const VRegister& vd,
   1315               const VRegister& vn,
   1316               const VRegister& vm,
   1317               const VRegister& va);
   1318 
   1319   // FP fused multiply-subtract and negate.
   1320   void fnmsub(const VRegister& vd,
   1321               const VRegister& vn,
   1322               const VRegister& vm,
   1323               const VRegister& va);
   1324 
   1325   // FP multiply-negate scalar.
   1326   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1327 
   1328   // FP reciprocal exponent scalar.
   1329   void frecpx(const VRegister& vd, const VRegister& vn);
   1330 
   1331   // FP divide.
   1332   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1333 
   1334   // FP maximum.
   1335   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1336 
   1337   // FP minimum.
   1338   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1339 
   1340   // FP maximum number.
   1341   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1342 
   1343   // FP minimum number.
   1344   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1345 
   1346   // FP absolute.
   1347   void fabs(const VRegister& vd, const VRegister& vn);
   1348 
   1349   // FP negate.
   1350   void fneg(const VRegister& vd, const VRegister& vn);
   1351 
   1352   // FP square root.
   1353   void fsqrt(const VRegister& vd, const VRegister& vn);
   1354 
   1355   // FP round to integer, nearest with ties to away.
   1356   void frinta(const VRegister& vd, const VRegister& vn);
   1357 
   1358   // FP round to integer, implicit rounding.
   1359   void frinti(const VRegister& vd, const VRegister& vn);
   1360 
   1361   // FP round to integer, toward minus infinity.
   1362   void frintm(const VRegister& vd, const VRegister& vn);
   1363 
   1364   // FP round to integer, nearest with ties to even.
   1365   void frintn(const VRegister& vd, const VRegister& vn);
   1366 
   1367   // FP round to integer, toward plus infinity.
   1368   void frintp(const VRegister& vd, const VRegister& vn);
   1369 
   1370   // FP round to integer, exact, implicit rounding.
   1371   void frintx(const VRegister& vd, const VRegister& vn);
   1372 
   1373   // FP round to integer, towards zero.
   1374   void frintz(const VRegister& vd, const VRegister& vn);
   1375 
   1376   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
   1377 
   1378   void FPCompareMacro(const VRegister& vn,
   1379                       const VRegister& vm,
   1380                       FPTrapFlags trap);
   1381 
   1382   // FP compare registers.
   1383   void fcmp(const VRegister& vn, const VRegister& vm);
   1384 
   1385   // FP compare immediate.
   1386   void fcmp(const VRegister& vn, double value);
   1387 
   1388   void FPCCompareMacro(const VRegister& vn,
   1389                        const VRegister& vm,
   1390                        StatusFlags nzcv,
   1391                        Condition cond,
   1392                        FPTrapFlags trap);
   1393 
   1394   // FP conditional compare.
   1395   void fccmp(const VRegister& vn,
   1396              const VRegister& vm,
   1397              StatusFlags nzcv,
   1398              Condition cond);
   1399 
   1400   // FP signaling compare registers.
   1401   void fcmpe(const VRegister& vn, const VRegister& vm);
   1402 
   1403   // FP signaling compare immediate.
   1404   void fcmpe(const VRegister& vn, double value);
   1405 
   1406   // FP conditional signaling compare.
   1407   void fccmpe(const VRegister& vn,
   1408               const VRegister& vm,
   1409               StatusFlags nzcv,
   1410               Condition cond);
   1411 
   1412   // FP conditional select.
   1413   void fcsel(const VRegister& vd,
   1414              const VRegister& vn,
   1415              const VRegister& vm,
   1416              Condition cond);
   1417 
   1418   // Common FP Convert functions.
   1419   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
   1420   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
   1421 
   1422   // FP convert between precisions.
   1423   void fcvt(const VRegister& vd, const VRegister& vn);
   1424 
   1425   // FP convert to higher precision.
   1426   void fcvtl(const VRegister& vd, const VRegister& vn);
   1427 
   1428   // FP convert to higher precision (second part).
   1429   void fcvtl2(const VRegister& vd, const VRegister& vn);
   1430 
   1431   // FP convert to lower precision.
   1432   void fcvtn(const VRegister& vd, const VRegister& vn);
   1433 
   1434   // FP convert to lower prevision (second part).
   1435   void fcvtn2(const VRegister& vd, const VRegister& vn);
   1436 
   1437   // FP convert to lower precision, rounding to odd.
   1438   void fcvtxn(const VRegister& vd, const VRegister& vn);
   1439 
   1440   // FP convert to lower precision, rounding to odd (second part).
   1441   void fcvtxn2(const VRegister& vd, const VRegister& vn);
   1442 
   1443   // FP convert to signed integer, nearest with ties to away.
   1444   void fcvtas(const Register& rd, const VRegister& vn);
   1445 
   1446   // FP convert to unsigned integer, nearest with ties to away.
   1447   void fcvtau(const Register& rd, const VRegister& vn);
   1448 
   1449   // FP convert to signed integer, nearest with ties to away.
   1450   void fcvtas(const VRegister& vd, const VRegister& vn);
   1451 
   1452   // FP convert to unsigned integer, nearest with ties to away.
   1453   void fcvtau(const VRegister& vd, const VRegister& vn);
   1454 
   1455   // FP convert to signed integer, round towards -infinity.
   1456   void fcvtms(const Register& rd, const VRegister& vn);
   1457 
   1458   // FP convert to unsigned integer, round towards -infinity.
   1459   void fcvtmu(const Register& rd, const VRegister& vn);
   1460 
   1461   // FP convert to signed integer, round towards -infinity.
   1462   void fcvtms(const VRegister& vd, const VRegister& vn);
   1463 
   1464   // FP convert to unsigned integer, round towards -infinity.
   1465   void fcvtmu(const VRegister& vd, const VRegister& vn);
   1466 
   1467   // FP convert to signed integer, nearest with ties to even.
   1468   void fcvtns(const Register& rd, const VRegister& vn);
   1469 
   1470   // FP convert to unsigned integer, nearest with ties to even.
   1471   void fcvtnu(const Register& rd, const VRegister& vn);
   1472 
   1473   // FP convert to signed integer, nearest with ties to even.
   1474   void fcvtns(const VRegister& rd, const VRegister& vn);
   1475 
   1476   // FP convert to unsigned integer, nearest with ties to even.
   1477   void fcvtnu(const VRegister& rd, const VRegister& vn);
   1478 
   1479   // FP convert to signed integer or fixed-point, round towards zero.
   1480   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
   1481 
   1482   // FP convert to unsigned integer or fixed-point, round towards zero.
   1483   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
   1484 
   1485   // FP convert to signed integer or fixed-point, round towards zero.
   1486   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
   1487 
   1488   // FP convert to unsigned integer or fixed-point, round towards zero.
   1489   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
   1490 
   1491   // FP convert to signed integer, round towards +infinity.
   1492   void fcvtps(const Register& rd, const VRegister& vn);
   1493 
   1494   // FP convert to unsigned integer, round towards +infinity.
   1495   void fcvtpu(const Register& rd, const VRegister& vn);
   1496 
   1497   // FP convert to signed integer, round towards +infinity.
   1498   void fcvtps(const VRegister& vd, const VRegister& vn);
   1499 
   1500   // FP convert to unsigned integer, round towards +infinity.
   1501   void fcvtpu(const VRegister& vd, const VRegister& vn);
   1502 
   1503   // Convert signed integer or fixed point to FP.
   1504   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
   1505 
   1506   // Convert unsigned integer or fixed point to FP.
   1507   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
   1508 
   1509   // Convert signed integer or fixed-point to FP.
   1510   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
   1511 
   1512   // Convert unsigned integer or fixed-point to FP.
   1513   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
   1514 
   1515   // Unsigned absolute difference.
   1516   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1517 
   1518   // Signed absolute difference.
   1519   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1520 
   1521   // Unsigned absolute difference and accumulate.
   1522   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1523 
   1524   // Signed absolute difference and accumulate.
   1525   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1526 
   1527   // Add.
   1528   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1529 
   1530   // Subtract.
   1531   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1532 
   1533   // Unsigned halving add.
   1534   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1535 
   1536   // Signed halving add.
   1537   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1538 
   1539   // Unsigned rounding halving add.
   1540   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1541 
   1542   // Signed rounding halving add.
   1543   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1544 
   1545   // Unsigned halving sub.
   1546   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1547 
   1548   // Signed halving sub.
   1549   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1550 
   1551   // Unsigned saturating add.
   1552   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1553 
   1554   // Signed saturating add.
   1555   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1556 
   1557   // Unsigned saturating subtract.
   1558   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1559 
   1560   // Signed saturating subtract.
   1561   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1562 
   1563   // Add pairwise.
   1564   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1565 
   1566   // Add pair of elements scalar.
   1567   void addp(const VRegister& vd, const VRegister& vn);
   1568 
   1569   // Multiply-add to accumulator.
   1570   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1571 
   1572   // Multiply-subtract to accumulator.
   1573   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1574 
   1575   // Multiply.
   1576   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1577 
   1578   // Multiply by scalar element.
   1579   void mul(const VRegister& vd,
   1580            const VRegister& vn,
   1581            const VRegister& vm,
   1582            int vm_index);
   1583 
   1584   // Multiply-add by scalar element.
   1585   void mla(const VRegister& vd,
   1586            const VRegister& vn,
   1587            const VRegister& vm,
   1588            int vm_index);
   1589 
   1590   // Multiply-subtract by scalar element.
   1591   void mls(const VRegister& vd,
   1592            const VRegister& vn,
   1593            const VRegister& vm,
   1594            int vm_index);
   1595 
   1596   // Signed long multiply-add by scalar element.
   1597   void smlal(const VRegister& vd,
   1598              const VRegister& vn,
   1599              const VRegister& vm,
   1600              int vm_index);
   1601 
   1602   // Signed long multiply-add by scalar element (second part).
   1603   void smlal2(const VRegister& vd,
   1604               const VRegister& vn,
   1605               const VRegister& vm,
   1606               int vm_index);
   1607 
   1608   // Unsigned long multiply-add by scalar element.
   1609   void umlal(const VRegister& vd,
   1610              const VRegister& vn,
   1611              const VRegister& vm,
   1612              int vm_index);
   1613 
   1614   // Unsigned long multiply-add by scalar element (second part).
   1615   void umlal2(const VRegister& vd,
   1616               const VRegister& vn,
   1617               const VRegister& vm,
   1618               int vm_index);
   1619 
   1620   // Signed long multiply-sub by scalar element.
   1621   void smlsl(const VRegister& vd,
   1622              const VRegister& vn,
   1623              const VRegister& vm,
   1624              int vm_index);
   1625 
   1626   // Signed long multiply-sub by scalar element (second part).
   1627   void smlsl2(const VRegister& vd,
   1628               const VRegister& vn,
   1629               const VRegister& vm,
   1630               int vm_index);
   1631 
   1632   // Unsigned long multiply-sub by scalar element.
   1633   void umlsl(const VRegister& vd,
   1634              const VRegister& vn,
   1635              const VRegister& vm,
   1636              int vm_index);
   1637 
   1638   // Unsigned long multiply-sub by scalar element (second part).
   1639   void umlsl2(const VRegister& vd,
   1640               const VRegister& vn,
   1641               const VRegister& vm,
   1642               int vm_index);
   1643 
   1644   // Signed long multiply by scalar element.
   1645   void smull(const VRegister& vd,
   1646              const VRegister& vn,
   1647              const VRegister& vm,
   1648              int vm_index);
   1649 
   1650   // Signed long multiply by scalar element (second part).
   1651   void smull2(const VRegister& vd,
   1652               const VRegister& vn,
   1653               const VRegister& vm,
   1654               int vm_index);
   1655 
   1656   // Unsigned long multiply by scalar element.
   1657   void umull(const VRegister& vd,
   1658              const VRegister& vn,
   1659              const VRegister& vm,
   1660              int vm_index);
   1661 
   1662   // Unsigned long multiply by scalar element (second part).
   1663   void umull2(const VRegister& vd,
   1664               const VRegister& vn,
   1665               const VRegister& vm,
   1666               int vm_index);
   1667 
   1668   // Signed saturating double long multiply by element.
   1669   void sqdmull(const VRegister& vd,
   1670                const VRegister& vn,
   1671                const VRegister& vm,
   1672                int vm_index);
   1673 
   1674   // Signed saturating double long multiply by element (second part).
   1675   void sqdmull2(const VRegister& vd,
   1676                 const VRegister& vn,
   1677                 const VRegister& vm,
   1678                 int vm_index);
   1679 
   1680   // Signed saturating doubling long multiply-add by element.
   1681   void sqdmlal(const VRegister& vd,
   1682                const VRegister& vn,
   1683                const VRegister& vm,
   1684                int vm_index);
   1685 
   1686   // Signed saturating doubling long multiply-add by element (second part).
   1687   void sqdmlal2(const VRegister& vd,
   1688                 const VRegister& vn,
   1689                 const VRegister& vm,
   1690                 int vm_index);
   1691 
   1692   // Signed saturating doubling long multiply-sub by element.
   1693   void sqdmlsl(const VRegister& vd,
   1694                const VRegister& vn,
   1695                const VRegister& vm,
   1696                int vm_index);
   1697 
   1698   // Signed saturating doubling long multiply-sub by element (second part).
   1699   void sqdmlsl2(const VRegister& vd,
   1700                 const VRegister& vn,
   1701                 const VRegister& vm,
   1702                 int vm_index);
   1703 
   1704   // Compare equal.
   1705   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1706 
   1707   // Compare signed greater than or equal.
   1708   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1709 
   1710   // Compare signed greater than.
   1711   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1712 
   1713   // Compare unsigned higher.
   1714   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1715 
   1716   // Compare unsigned higher or same.
   1717   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1718 
   1719   // Compare bitwise test bits nonzero.
   1720   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1721 
   1722   // Compare bitwise to zero.
   1723   void cmeq(const VRegister& vd, const VRegister& vn, int value);
   1724 
   1725   // Compare signed greater than or equal to zero.
   1726   void cmge(const VRegister& vd, const VRegister& vn, int value);
   1727 
   1728   // Compare signed greater than zero.
   1729   void cmgt(const VRegister& vd, const VRegister& vn, int value);
   1730 
   1731   // Compare signed less than or equal to zero.
   1732   void cmle(const VRegister& vd, const VRegister& vn, int value);
   1733 
   1734   // Compare signed less than zero.
   1735   void cmlt(const VRegister& vd, const VRegister& vn, int value);
   1736 
   1737   // Signed shift left by register.
   1738   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1739 
   1740   // Unsigned shift left by register.
   1741   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1742 
   1743   // Signed saturating shift left by register.
   1744   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1745 
   1746   // Unsigned saturating shift left by register.
   1747   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1748 
   1749   // Signed rounding shift left by register.
   1750   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1751 
   1752   // Unsigned rounding shift left by register.
   1753   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1754 
   1755   // Signed saturating rounding shift left by register.
   1756   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1757 
   1758   // Unsigned saturating rounding shift left by register.
   1759   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1760 
   1761   // Bitwise and.
   1762   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1763 
   1764   // Bitwise or.
   1765   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1766 
   1767   // Bitwise or immediate.
   1768   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
   1769 
   1770   // Move register to register.
   1771   void mov(const VRegister& vd, const VRegister& vn);
   1772 
   1773   // Bitwise orn.
   1774   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1775 
   1776   // Bitwise eor.
   1777   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1778 
   1779   // Bit clear immediate.
   1780   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
   1781 
   1782   // Bit clear.
   1783   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1784 
   1785   // Bitwise insert if false.
   1786   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1787 
   1788   // Bitwise insert if true.
   1789   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1790 
   1791   // Bitwise select.
   1792   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1793 
   1794   // Polynomial multiply.
   1795   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1796 
   1797   // Vector move immediate.
   1798   void movi(const VRegister& vd,
   1799             const uint64_t imm,
   1800             Shift shift = LSL,
   1801             const int shift_amount = 0);
   1802 
   1803   // Bitwise not.
   1804   void mvn(const VRegister& vd, const VRegister& vn);
   1805 
   1806   // Vector move inverted immediate.
   1807   void mvni(const VRegister& vd,
   1808             const int imm8,
   1809             Shift shift = LSL,
   1810             const int shift_amount = 0);
   1811 
   1812   // Signed saturating accumulate of unsigned value.
   1813   void suqadd(const VRegister& vd, const VRegister& vn);
   1814 
   1815   // Unsigned saturating accumulate of signed value.
   1816   void usqadd(const VRegister& vd, const VRegister& vn);
   1817 
   1818   // Absolute value.
   1819   void abs(const VRegister& vd, const VRegister& vn);
   1820 
   1821   // Signed saturating absolute value.
   1822   void sqabs(const VRegister& vd, const VRegister& vn);
   1823 
   1824   // Negate.
   1825   void neg(const VRegister& vd, const VRegister& vn);
   1826 
   1827   // Signed saturating negate.
   1828   void sqneg(const VRegister& vd, const VRegister& vn);
   1829 
   1830   // Bitwise not.
   1831   void not_(const VRegister& vd, const VRegister& vn);
   1832 
   1833   // Extract narrow.
   1834   void xtn(const VRegister& vd, const VRegister& vn);
   1835 
   1836   // Extract narrow (second part).
   1837   void xtn2(const VRegister& vd, const VRegister& vn);
   1838 
   1839   // Signed saturating extract narrow.
   1840   void sqxtn(const VRegister& vd, const VRegister& vn);
   1841 
   1842   // Signed saturating extract narrow (second part).
   1843   void sqxtn2(const VRegister& vd, const VRegister& vn);
   1844 
   1845   // Unsigned saturating extract narrow.
   1846   void uqxtn(const VRegister& vd, const VRegister& vn);
   1847 
   1848   // Unsigned saturating extract narrow (second part).
   1849   void uqxtn2(const VRegister& vd, const VRegister& vn);
   1850 
   1851   // Signed saturating extract unsigned narrow.
   1852   void sqxtun(const VRegister& vd, const VRegister& vn);
   1853 
   1854   // Signed saturating extract unsigned narrow (second part).
   1855   void sqxtun2(const VRegister& vd, const VRegister& vn);
   1856 
   1857   // Extract vector from pair of vectors.
   1858   void ext(const VRegister& vd,
   1859            const VRegister& vn,
   1860            const VRegister& vm,
   1861            int index);
   1862 
   1863   // Duplicate vector element to vector or scalar.
   1864   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
   1865 
   1866   // Move vector element to scalar.
   1867   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
   1868 
   1869   // Duplicate general-purpose register to vector.
   1870   void dup(const VRegister& vd, const Register& rn);
   1871 
   1872   // Insert vector element from another vector element.
   1873   void ins(const VRegister& vd,
   1874            int vd_index,
   1875            const VRegister& vn,
   1876            int vn_index);
   1877 
   1878   // Move vector element to another vector element.
   1879   void mov(const VRegister& vd,
   1880            int vd_index,
   1881            const VRegister& vn,
   1882            int vn_index);
   1883 
   1884   // Insert vector element from general-purpose register.
   1885   void ins(const VRegister& vd, int vd_index, const Register& rn);
   1886 
   1887   // Move general-purpose register to a vector element.
   1888   void mov(const VRegister& vd, int vd_index, const Register& rn);
   1889 
   1890   // Unsigned move vector element to general-purpose register.
   1891   void umov(const Register& rd, const VRegister& vn, int vn_index);
   1892 
   1893   // Move vector element to general-purpose register.
   1894   void mov(const Register& rd, const VRegister& vn, int vn_index);
   1895 
   1896   // Signed move vector element to general-purpose register.
   1897   void smov(const Register& rd, const VRegister& vn, int vn_index);
   1898 
   1899   // One-element structure load to one register.
   1900   void ld1(const VRegister& vt, const MemOperand& src);
   1901 
   1902   // One-element structure load to two registers.
   1903   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   1904 
   1905   // One-element structure load to three registers.
   1906   void ld1(const VRegister& vt,
   1907            const VRegister& vt2,
   1908            const VRegister& vt3,
   1909            const MemOperand& src);
   1910 
   1911   // One-element structure load to four registers.
   1912   void ld1(const VRegister& vt,
   1913            const VRegister& vt2,
   1914            const VRegister& vt3,
   1915            const VRegister& vt4,
   1916            const MemOperand& src);
   1917 
   1918   // One-element single structure load to one lane.
   1919   void ld1(const VRegister& vt, int lane, const MemOperand& src);
   1920 
   1921   // One-element single structure load to all lanes.
   1922   void ld1r(const VRegister& vt, const MemOperand& src);
   1923 
   1924   // Two-element structure load.
   1925   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   1926 
   1927   // Two-element single structure load to one lane.
   1928   void ld2(const VRegister& vt,
   1929            const VRegister& vt2,
   1930            int lane,
   1931            const MemOperand& src);
   1932 
   1933   // Two-element single structure load to all lanes.
   1934   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   1935 
   1936   // Three-element structure load.
   1937   void ld3(const VRegister& vt,
   1938            const VRegister& vt2,
   1939            const VRegister& vt3,
   1940            const MemOperand& src);
   1941 
   1942   // Three-element single structure load to one lane.
   1943   void ld3(const VRegister& vt,
   1944            const VRegister& vt2,
   1945            const VRegister& vt3,
   1946            int lane,
   1947            const MemOperand& src);
   1948 
   1949   // Three-element single structure load to all lanes.
   1950   void ld3r(const VRegister& vt,
   1951             const VRegister& vt2,
   1952             const VRegister& vt3,
   1953             const MemOperand& src);
   1954 
   1955   // Four-element structure load.
   1956   void ld4(const VRegister& vt,
   1957            const VRegister& vt2,
   1958            const VRegister& vt3,
   1959            const VRegister& vt4,
   1960            const MemOperand& src);
   1961 
   1962   // Four-element single structure load to one lane.
   1963   void ld4(const VRegister& vt,
   1964            const VRegister& vt2,
   1965            const VRegister& vt3,
   1966            const VRegister& vt4,
   1967            int lane,
   1968            const MemOperand& src);
   1969 
   1970   // Four-element single structure load to all lanes.
   1971   void ld4r(const VRegister& vt,
   1972             const VRegister& vt2,
   1973             const VRegister& vt3,
   1974             const VRegister& vt4,
   1975             const MemOperand& src);
   1976 
   1977   // Count leading sign bits.
   1978   void cls(const VRegister& vd, const VRegister& vn);
   1979 
   1980   // Count leading zero bits (vector).
   1981   void clz(const VRegister& vd, const VRegister& vn);
   1982 
   1983   // Population count per byte.
   1984   void cnt(const VRegister& vd, const VRegister& vn);
   1985 
   1986   // Reverse bit order.
   1987   void rbit(const VRegister& vd, const VRegister& vn);
   1988 
   1989   // Reverse elements in 16-bit halfwords.
   1990   void rev16(const VRegister& vd, const VRegister& vn);
   1991 
   1992   // Reverse elements in 32-bit words.
   1993   void rev32(const VRegister& vd, const VRegister& vn);
   1994 
   1995   // Reverse elements in 64-bit doublewords.
   1996   void rev64(const VRegister& vd, const VRegister& vn);
   1997 
   1998   // Unsigned reciprocal square root estimate.
   1999   void ursqrte(const VRegister& vd, const VRegister& vn);
   2000 
   2001   // Unsigned reciprocal estimate.
   2002   void urecpe(const VRegister& vd, const VRegister& vn);
   2003 
   2004   // Signed pairwise long add.
   2005   void saddlp(const VRegister& vd, const VRegister& vn);
   2006 
   2007   // Unsigned pairwise long add.
   2008   void uaddlp(const VRegister& vd, const VRegister& vn);
   2009 
   2010   // Signed pairwise long add and accumulate.
   2011   void sadalp(const VRegister& vd, const VRegister& vn);
   2012 
   2013   // Unsigned pairwise long add and accumulate.
   2014   void uadalp(const VRegister& vd, const VRegister& vn);
   2015 
   2016   // Shift left by immediate.
   2017   void shl(const VRegister& vd, const VRegister& vn, int shift);
   2018 
   2019   // Signed saturating shift left by immediate.
   2020   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
   2021 
   2022   // Signed saturating shift left unsigned by immediate.
   2023   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
   2024 
   2025   // Unsigned saturating shift left by immediate.
   2026   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
   2027 
   2028   // Signed shift left long by immediate.
   2029   void sshll(const VRegister& vd, const VRegister& vn, int shift);
   2030 
   2031   // Signed shift left long by immediate (second part).
   2032   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
   2033 
   2034   // Signed extend long.
   2035   void sxtl(const VRegister& vd, const VRegister& vn);
   2036 
   2037   // Signed extend long (second part).
   2038   void sxtl2(const VRegister& vd, const VRegister& vn);
   2039 
   2040   // Unsigned shift left long by immediate.
   2041   void ushll(const VRegister& vd, const VRegister& vn, int shift);
   2042 
   2043   // Unsigned shift left long by immediate (second part).
   2044   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
   2045 
   2046   // Shift left long by element size.
   2047   void shll(const VRegister& vd, const VRegister& vn, int shift);
   2048 
   2049   // Shift left long by element size (second part).
   2050   void shll2(const VRegister& vd, const VRegister& vn, int shift);
   2051 
   2052   // Unsigned extend long.
   2053   void uxtl(const VRegister& vd, const VRegister& vn);
   2054 
   2055   // Unsigned extend long (second part).
   2056   void uxtl2(const VRegister& vd, const VRegister& vn);
   2057 
   2058   // Shift left by immediate and insert.
   2059   void sli(const VRegister& vd, const VRegister& vn, int shift);
   2060 
   2061   // Shift right by immediate and insert.
   2062   void sri(const VRegister& vd, const VRegister& vn, int shift);
   2063 
   2064   // Signed maximum.
   2065   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2066 
   2067   // Signed pairwise maximum.
   2068   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2069 
   2070   // Add across vector.
   2071   void addv(const VRegister& vd, const VRegister& vn);
   2072 
   2073   // Signed add long across vector.
   2074   void saddlv(const VRegister& vd, const VRegister& vn);
   2075 
   2076   // Unsigned add long across vector.
   2077   void uaddlv(const VRegister& vd, const VRegister& vn);
   2078 
   2079   // FP maximum number across vector.
   2080   void fmaxnmv(const VRegister& vd, const VRegister& vn);
   2081 
   2082   // FP maximum across vector.
   2083   void fmaxv(const VRegister& vd, const VRegister& vn);
   2084 
   2085   // FP minimum number across vector.
   2086   void fminnmv(const VRegister& vd, const VRegister& vn);
   2087 
   2088   // FP minimum across vector.
   2089   void fminv(const VRegister& vd, const VRegister& vn);
   2090 
   2091   // Signed maximum across vector.
   2092   void smaxv(const VRegister& vd, const VRegister& vn);
   2093 
   2094   // Signed minimum.
   2095   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2096 
   2097   // Signed minimum pairwise.
   2098   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2099 
   2100   // Signed minimum across vector.
   2101   void sminv(const VRegister& vd, const VRegister& vn);
   2102 
   2103   // One-element structure store from one register.
   2104   void st1(const VRegister& vt, const MemOperand& src);
   2105 
   2106   // One-element structure store from two registers.
   2107   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   2108 
   2109   // One-element structure store from three registers.
   2110   void st1(const VRegister& vt,
   2111            const VRegister& vt2,
   2112            const VRegister& vt3,
   2113            const MemOperand& src);
   2114 
   2115   // One-element structure store from four registers.
   2116   void st1(const VRegister& vt,
   2117            const VRegister& vt2,
   2118            const VRegister& vt3,
   2119            const VRegister& vt4,
   2120            const MemOperand& src);
   2121 
   2122   // One-element single structure store from one lane.
   2123   void st1(const VRegister& vt, int lane, const MemOperand& src);
   2124 
   2125   // Two-element structure store from two registers.
   2126   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   2127 
   2128   // Two-element single structure store from two lanes.
   2129   void st2(const VRegister& vt,
   2130            const VRegister& vt2,
   2131            int lane,
   2132            const MemOperand& src);
   2133 
   2134   // Three-element structure store from three registers.
   2135   void st3(const VRegister& vt,
   2136            const VRegister& vt2,
   2137            const VRegister& vt3,
   2138            const MemOperand& src);
   2139 
   2140   // Three-element single structure store from three lanes.
   2141   void st3(const VRegister& vt,
   2142            const VRegister& vt2,
   2143            const VRegister& vt3,
   2144            int lane,
   2145            const MemOperand& src);
   2146 
   2147   // Four-element structure store from four registers.
   2148   void st4(const VRegister& vt,
   2149            const VRegister& vt2,
   2150            const VRegister& vt3,
   2151            const VRegister& vt4,
   2152            const MemOperand& src);
   2153 
   2154   // Four-element single structure store from four lanes.
   2155   void st4(const VRegister& vt,
   2156            const VRegister& vt2,
   2157            const VRegister& vt3,
   2158            const VRegister& vt4,
   2159            int lane,
   2160            const MemOperand& src);
   2161 
   2162   // Unsigned add long.
   2163   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2164 
   2165   // Unsigned add long (second part).
   2166   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2167 
   2168   // Unsigned add wide.
   2169   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2170 
   2171   // Unsigned add wide (second part).
   2172   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2173 
   2174   // Signed add long.
   2175   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2176 
   2177   // Signed add long (second part).
   2178   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2179 
   2180   // Signed add wide.
   2181   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2182 
   2183   // Signed add wide (second part).
   2184   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2185 
   2186   // Unsigned subtract long.
   2187   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2188 
   2189   // Unsigned subtract long (second part).
   2190   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2191 
   2192   // Unsigned subtract wide.
   2193   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2194 
   2195   // Unsigned subtract wide (second part).
   2196   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2197 
   2198   // Signed subtract long.
   2199   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2200 
   2201   // Signed subtract long (second part).
   2202   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2203 
   2204   // Signed integer subtract wide.
   2205   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2206 
   2207   // Signed integer subtract wide (second part).
   2208   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2209 
   2210   // Unsigned maximum.
   2211   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2212 
   2213   // Unsigned pairwise maximum.
   2214   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2215 
   2216   // Unsigned maximum across vector.
   2217   void umaxv(const VRegister& vd, const VRegister& vn);
   2218 
   2219   // Unsigned minimum.
   2220   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2221 
   2222   // Unsigned pairwise minimum.
   2223   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2224 
   2225   // Unsigned minimum across vector.
   2226   void uminv(const VRegister& vd, const VRegister& vn);
   2227 
   2228   // Transpose vectors (primary).
   2229   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2230 
   2231   // Transpose vectors (secondary).
   2232   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2233 
   2234   // Unzip vectors (primary).
   2235   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2236 
   2237   // Unzip vectors (secondary).
   2238   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2239 
   2240   // Zip vectors (primary).
   2241   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2242 
   2243   // Zip vectors (secondary).
   2244   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2245 
   2246   // Signed shift right by immediate.
   2247   void sshr(const VRegister& vd, const VRegister& vn, int shift);
   2248 
   2249   // Unsigned shift right by immediate.
   2250   void ushr(const VRegister& vd, const VRegister& vn, int shift);
   2251 
   2252   // Signed rounding shift right by immediate.
   2253   void srshr(const VRegister& vd, const VRegister& vn, int shift);
   2254 
   2255   // Unsigned rounding shift right by immediate.
   2256   void urshr(const VRegister& vd, const VRegister& vn, int shift);
   2257 
   2258   // Signed shift right by immediate and accumulate.
   2259   void ssra(const VRegister& vd, const VRegister& vn, int shift);
   2260 
   2261   // Unsigned shift right by immediate and accumulate.
   2262   void usra(const VRegister& vd, const VRegister& vn, int shift);
   2263 
   2264   // Signed rounding shift right by immediate and accumulate.
   2265   void srsra(const VRegister& vd, const VRegister& vn, int shift);
   2266 
   2267   // Unsigned rounding shift right by immediate and accumulate.
   2268   void ursra(const VRegister& vd, const VRegister& vn, int shift);
   2269 
   2270   // Shift right narrow by immediate.
   2271   void shrn(const VRegister& vd, const VRegister& vn, int shift);
   2272 
   2273   // Shift right narrow by immediate (second part).
   2274   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
   2275 
   2276   // Rounding shift right narrow by immediate.
   2277   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
   2278 
   2279   // Rounding shift right narrow by immediate (second part).
   2280   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2281 
   2282   // Unsigned saturating shift right narrow by immediate.
   2283   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
   2284 
   2285   // Unsigned saturating shift right narrow by immediate (second part).
   2286   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2287 
   2288   // Unsigned saturating rounding shift right narrow by immediate.
   2289   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
   2290 
   2291   // Unsigned saturating rounding shift right narrow by immediate (second part).
   2292   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2293 
   2294   // Signed saturating shift right narrow by immediate.
   2295   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
   2296 
   2297   // Signed saturating shift right narrow by immediate (second part).
   2298   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2299 
   2300   // Signed saturating rounded shift right narrow by immediate.
   2301   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
   2302 
   2303   // Signed saturating rounded shift right narrow by immediate (second part).
   2304   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2305 
   2306   // Signed saturating shift right unsigned narrow by immediate.
   2307   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
   2308 
   2309   // Signed saturating shift right unsigned narrow by immediate (second part).
   2310   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
   2311 
   2312   // Signed sat rounded shift right unsigned narrow by immediate.
   2313   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
   2314 
   2315   // Signed sat rounded shift right unsigned narrow by immediate (second part).
   2316   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
   2317 
   2318   // FP reciprocal step.
   2319   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2320 
   2321   // FP reciprocal estimate.
   2322   void frecpe(const VRegister& vd, const VRegister& vn);
   2323 
   2324   // FP reciprocal square root estimate.
   2325   void frsqrte(const VRegister& vd, const VRegister& vn);
   2326 
   2327   // FP reciprocal square root step.
   2328   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2329 
   2330   // Signed absolute difference and accumulate long.
   2331   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2332 
   2333   // Signed absolute difference and accumulate long (second part).
   2334   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2335 
   2336   // Unsigned absolute difference and accumulate long.
   2337   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2338 
   2339   // Unsigned absolute difference and accumulate long (second part).
   2340   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2341 
   2342   // Signed absolute difference long.
   2343   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2344 
   2345   // Signed absolute difference long (second part).
   2346   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2347 
   2348   // Unsigned absolute difference long.
   2349   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2350 
   2351   // Unsigned absolute difference long (second part).
   2352   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2353 
   2354   // Polynomial multiply long.
   2355   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2356 
   2357   // Polynomial multiply long (second part).
   2358   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2359 
   2360   // Signed long multiply-add.
   2361   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2362 
   2363   // Signed long multiply-add (second part).
   2364   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2365 
   2366   // Unsigned long multiply-add.
   2367   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2368 
   2369   // Unsigned long multiply-add (second part).
   2370   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2371 
   2372   // Signed long multiply-sub.
   2373   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2374 
   2375   // Signed long multiply-sub (second part).
   2376   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2377 
   2378   // Unsigned long multiply-sub.
   2379   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2380 
   2381   // Unsigned long multiply-sub (second part).
   2382   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2383 
   2384   // Signed long multiply.
   2385   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2386 
   2387   // Signed long multiply (second part).
   2388   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2389 
   2390   // Signed saturating doubling long multiply-add.
   2391   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2392 
   2393   // Signed saturating doubling long multiply-add (second part).
   2394   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2395 
   2396   // Signed saturating doubling long multiply-subtract.
   2397   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2398 
   2399   // Signed saturating doubling long multiply-subtract (second part).
   2400   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2401 
   2402   // Signed saturating doubling long multiply.
   2403   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2404 
   2405   // Signed saturating doubling long multiply (second part).
   2406   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2407 
   2408   // Signed saturating doubling multiply returning high half.
   2409   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2410 
   2411   // Signed saturating rounding doubling multiply returning high half.
   2412   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2413 
   2414   // Signed saturating doubling multiply element returning high half.
   2415   void sqdmulh(const VRegister& vd,
   2416                const VRegister& vn,
   2417                const VRegister& vm,
   2418                int vm_index);
   2419 
   2420   // Signed saturating rounding doubling multiply element returning high half.
   2421   void sqrdmulh(const VRegister& vd,
   2422                 const VRegister& vn,
   2423                 const VRegister& vm,
   2424                 int vm_index);
   2425 
   2426   // Unsigned long multiply long.
   2427   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2428 
   2429   // Unsigned long multiply (second part).
   2430   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2431 
   2432   // Add narrow returning high half.
   2433   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2434 
   2435   // Add narrow returning high half (second part).
   2436   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2437 
   2438   // Rounding add narrow returning high half.
   2439   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2440 
   2441   // Rounding add narrow returning high half (second part).
   2442   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2443 
   2444   // Subtract narrow returning high half.
   2445   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2446 
   2447   // Subtract narrow returning high half (second part).
   2448   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2449 
   2450   // Rounding subtract narrow returning high half.
   2451   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2452 
   2453   // Rounding subtract narrow returning high half (second part).
   2454   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2455 
   2456   // FP vector multiply accumulate.
   2457   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2458 
   2459   // FP vector multiply subtract.
   2460   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2461 
   2462   // FP vector multiply extended.
   2463   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2464 
   2465   // FP absolute greater than or equal.
   2466   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2467 
   2468   // FP absolute greater than.
   2469   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2470 
   2471   // FP multiply by element.
   2472   void fmul(const VRegister& vd,
   2473             const VRegister& vn,
   2474             const VRegister& vm,
   2475             int vm_index);
   2476 
   2477   // FP fused multiply-add to accumulator by element.
   2478   void fmla(const VRegister& vd,
   2479             const VRegister& vn,
   2480             const VRegister& vm,
   2481             int vm_index);
   2482 
   2483   // FP fused multiply-sub from accumulator by element.
   2484   void fmls(const VRegister& vd,
   2485             const VRegister& vn,
   2486             const VRegister& vm,
   2487             int vm_index);
   2488 
   2489   // FP multiply extended by element.
   2490   void fmulx(const VRegister& vd,
   2491              const VRegister& vn,
   2492              const VRegister& vm,
   2493              int vm_index);
   2494 
   2495   // FP compare equal.
   2496   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2497 
   2498   // FP greater than.
   2499   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2500 
   2501   // FP greater than or equal.
   2502   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2503 
   2504   // FP compare equal to zero.
   2505   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
   2506 
   2507   // FP greater than zero.
   2508   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
   2509 
   2510   // FP greater than or equal to zero.
   2511   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
   2512 
   2513   // FP less than or equal to zero.
   2514   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
   2515 
   2516   // FP less than to zero.
   2517   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
   2518 
   2519   // FP absolute difference.
   2520   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2521 
   2522   // FP pairwise add vector.
   2523   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2524 
   2525   // FP pairwise add scalar.
   2526   void faddp(const VRegister& vd, const VRegister& vn);
   2527 
   2528   // FP pairwise maximum vector.
   2529   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2530 
   2531   // FP pairwise maximum scalar.
   2532   void fmaxp(const VRegister& vd, const VRegister& vn);
   2533 
   2534   // FP pairwise minimum vector.
   2535   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2536 
   2537   // FP pairwise minimum scalar.
   2538   void fminp(const VRegister& vd, const VRegister& vn);
   2539 
   2540   // FP pairwise maximum number vector.
   2541   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2542 
   2543   // FP pairwise maximum number scalar.
   2544   void fmaxnmp(const VRegister& vd, const VRegister& vn);
   2545 
   2546   // FP pairwise minimum number vector.
   2547   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2548 
   2549   // FP pairwise minimum number scalar.
   2550   void fminnmp(const VRegister& vd, const VRegister& vn);
   2551 
   2552   // Emit generic instructions.
   2553   // Emit raw instructions into the instruction stream.
   2554   void dci(Instr raw_inst) { Emit(raw_inst); }
   2555 
   2556   // Emit 32 bits of data into the instruction stream.
   2557   void dc32(uint32_t data) { dc(data); }
   2558 
   2559   // Emit 64 bits of data into the instruction stream.
   2560   void dc64(uint64_t data) { dc(data); }
   2561 
   2562   // Emit data in the instruction stream.
   2563   template <typename T>
   2564   void dc(T data) {
   2565     VIXL_ASSERT(AllowAssembler());
   2566     GetBuffer()->Emit<T>(data);
   2567   }
   2568 
   2569   // Copy a string into the instruction stream, including the terminating NULL
   2570   // character. The instruction pointer is then aligned correctly for
   2571   // subsequent instructions.
   2572   void EmitString(const char* string) {
   2573     VIXL_ASSERT(string != NULL);
   2574     VIXL_ASSERT(AllowAssembler());
   2575 
   2576     GetBuffer()->EmitString(string);
   2577     GetBuffer()->Align();
   2578   }
   2579 
   2580   // Code generation helpers.
   2581 
   2582   // Register encoding.
   2583   static Instr Rd(CPURegister rd) {
   2584     VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode);
   2585     return rd.GetCode() << Rd_offset;
   2586   }
   2587 
   2588   static Instr Rn(CPURegister rn) {
   2589     VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode);
   2590     return rn.GetCode() << Rn_offset;
   2591   }
   2592 
   2593   static Instr Rm(CPURegister rm) {
   2594     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
   2595     return rm.GetCode() << Rm_offset;
   2596   }
   2597 
   2598   static Instr RmNot31(CPURegister rm) {
   2599     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
   2600     VIXL_ASSERT(!rm.IsZero());
   2601     return Rm(rm);
   2602   }
   2603 
   2604   static Instr Ra(CPURegister ra) {
   2605     VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode);
   2606     return ra.GetCode() << Ra_offset;
   2607   }
   2608 
   2609   static Instr Rt(CPURegister rt) {
   2610     VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode);
   2611     return rt.GetCode() << Rt_offset;
   2612   }
   2613 
   2614   static Instr Rt2(CPURegister rt2) {
   2615     VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode);
   2616     return rt2.GetCode() << Rt2_offset;
   2617   }
   2618 
   2619   static Instr Rs(CPURegister rs) {
   2620     VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode);
   2621     return rs.GetCode() << Rs_offset;
   2622   }
   2623 
   2624   // These encoding functions allow the stack pointer to be encoded, and
   2625   // disallow the zero register.
   2626   static Instr RdSP(Register rd) {
   2627     VIXL_ASSERT(!rd.IsZero());
   2628     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
   2629   }
   2630 
   2631   static Instr RnSP(Register rn) {
   2632     VIXL_ASSERT(!rn.IsZero());
   2633     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
   2634   }
   2635 
   2636   // Flags encoding.
   2637   static Instr Flags(FlagsUpdate S) {
   2638     if (S == SetFlags) {
   2639       return 1 << FlagsUpdate_offset;
   2640     } else if (S == LeaveFlags) {
   2641       return 0 << FlagsUpdate_offset;
   2642     }
   2643     VIXL_UNREACHABLE();
   2644     return 0;
   2645   }
   2646 
   2647   static Instr Cond(Condition cond) { return cond << Condition_offset; }
   2648 
   2649   // PC-relative address encoding.
   2650   static Instr ImmPCRelAddress(int64_t imm21) {
   2651     VIXL_ASSERT(IsInt21(imm21));
   2652     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
   2653     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
   2654     Instr immlo = imm << ImmPCRelLo_offset;
   2655     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
   2656   }
   2657 
   2658   // Branch encoding.
   2659   static Instr ImmUncondBranch(int64_t imm26) {
   2660     VIXL_ASSERT(IsInt26(imm26));
   2661     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
   2662   }
   2663 
   2664   static Instr ImmCondBranch(int64_t imm19) {
   2665     VIXL_ASSERT(IsInt19(imm19));
   2666     return TruncateToUint19(imm19) << ImmCondBranch_offset;
   2667   }
   2668 
   2669   static Instr ImmCmpBranch(int64_t imm19) {
   2670     VIXL_ASSERT(IsInt19(imm19));
   2671     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
   2672   }
   2673 
   2674   static Instr ImmTestBranch(int64_t imm14) {
   2675     VIXL_ASSERT(IsInt14(imm14));
   2676     return TruncateToUint14(imm14) << ImmTestBranch_offset;
   2677   }
   2678 
   2679   static Instr ImmTestBranchBit(unsigned bit_pos) {
   2680     VIXL_ASSERT(IsUint6(bit_pos));
   2681     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
   2682     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
   2683     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
   2684     b5 &= ImmTestBranchBit5_mask;
   2685     b40 &= ImmTestBranchBit40_mask;
   2686     return b5 | b40;
   2687   }
   2688 
   2689   // Data Processing encoding.
   2690   static Instr SF(Register rd) {
   2691     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
   2692   }
   2693 
   2694   static Instr ImmAddSub(int imm) {
   2695     VIXL_ASSERT(IsImmAddSub(imm));
   2696     if (IsUint12(imm)) {  // No shift required.
   2697       imm <<= ImmAddSub_offset;
   2698     } else {
   2699       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
   2700     }
   2701     return imm;
   2702   }
   2703 
   2704   static Instr ImmS(unsigned imms, unsigned reg_size) {
   2705     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
   2706                 ((reg_size == kWRegSize) && IsUint5(imms)));
   2707     USE(reg_size);
   2708     return imms << ImmS_offset;
   2709   }
   2710 
   2711   static Instr ImmR(unsigned immr, unsigned reg_size) {
   2712     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
   2713                 ((reg_size == kWRegSize) && IsUint5(immr)));
   2714     USE(reg_size);
   2715     VIXL_ASSERT(IsUint6(immr));
   2716     return immr << ImmR_offset;
   2717   }
   2718 
   2719   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
   2720     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   2721     VIXL_ASSERT(IsUint6(imms));
   2722     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
   2723     USE(reg_size);
   2724     return imms << ImmSetBits_offset;
   2725   }
   2726 
   2727   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
   2728     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   2729     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
   2730                 ((reg_size == kWRegSize) && IsUint5(immr)));
   2731     USE(reg_size);
   2732     return immr << ImmRotate_offset;
   2733   }
   2734 
   2735   static Instr ImmLLiteral(int64_t imm19) {
   2736     VIXL_ASSERT(IsInt19(imm19));
   2737     return TruncateToUint19(imm19) << ImmLLiteral_offset;
   2738   }
   2739 
   2740   static Instr BitN(unsigned bitn, unsigned reg_size) {
   2741     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   2742     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
   2743     USE(reg_size);
   2744     return bitn << BitN_offset;
   2745   }
   2746 
   2747   static Instr ShiftDP(Shift shift) {
   2748     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
   2749     return shift << ShiftDP_offset;
   2750   }
   2751 
   2752   static Instr ImmDPShift(unsigned amount) {
   2753     VIXL_ASSERT(IsUint6(amount));
   2754     return amount << ImmDPShift_offset;
   2755   }
   2756 
   2757   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
   2758 
   2759   static Instr ImmExtendShift(unsigned left_shift) {
   2760     VIXL_ASSERT(left_shift <= 4);
   2761     return left_shift << ImmExtendShift_offset;
   2762   }
   2763 
   2764   static Instr ImmCondCmp(unsigned imm) {
   2765     VIXL_ASSERT(IsUint5(imm));
   2766     return imm << ImmCondCmp_offset;
   2767   }
   2768 
   2769   static Instr Nzcv(StatusFlags nzcv) {
   2770     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
   2771   }
   2772 
   2773   // MemOperand offset encoding.
   2774   static Instr ImmLSUnsigned(int64_t imm12) {
   2775     VIXL_ASSERT(IsUint12(imm12));
   2776     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
   2777   }
   2778 
   2779   static Instr ImmLS(int64_t imm9) {
   2780     VIXL_ASSERT(IsInt9(imm9));
   2781     return TruncateToUint9(imm9) << ImmLS_offset;
   2782   }
   2783 
   2784   static Instr ImmLSPair(int64_t imm7, unsigned access_size) {
   2785     VIXL_ASSERT(IsMultiple(imm7, 1 << access_size));
   2786     int64_t scaled_imm7 = imm7 / (1 << access_size);
   2787     VIXL_ASSERT(IsInt7(scaled_imm7));
   2788     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
   2789   }
   2790 
   2791   static Instr ImmShiftLS(unsigned shift_amount) {
   2792     VIXL_ASSERT(IsUint1(shift_amount));
   2793     return shift_amount << ImmShiftLS_offset;
   2794   }
   2795 
   2796   static Instr ImmPrefetchOperation(int imm5) {
   2797     VIXL_ASSERT(IsUint5(imm5));
   2798     return imm5 << ImmPrefetchOperation_offset;
   2799   }
   2800 
   2801   static Instr ImmException(int imm16) {
   2802     VIXL_ASSERT(IsUint16(imm16));
   2803     return imm16 << ImmException_offset;
   2804   }
   2805 
   2806   static Instr ImmSystemRegister(int imm15) {
   2807     VIXL_ASSERT(IsUint15(imm15));
   2808     return imm15 << ImmSystemRegister_offset;
   2809   }
   2810 
   2811   static Instr ImmHint(int imm7) {
   2812     VIXL_ASSERT(IsUint7(imm7));
   2813     return imm7 << ImmHint_offset;
   2814   }
   2815 
   2816   static Instr CRm(int imm4) {
   2817     VIXL_ASSERT(IsUint4(imm4));
   2818     return imm4 << CRm_offset;
   2819   }
   2820 
   2821   static Instr CRn(int imm4) {
   2822     VIXL_ASSERT(IsUint4(imm4));
   2823     return imm4 << CRn_offset;
   2824   }
   2825 
   2826   static Instr SysOp(int imm14) {
   2827     VIXL_ASSERT(IsUint14(imm14));
   2828     return imm14 << SysOp_offset;
   2829   }
   2830 
   2831   static Instr ImmSysOp1(int imm3) {
   2832     VIXL_ASSERT(IsUint3(imm3));
   2833     return imm3 << SysOp1_offset;
   2834   }
   2835 
   2836   static Instr ImmSysOp2(int imm3) {
   2837     VIXL_ASSERT(IsUint3(imm3));
   2838     return imm3 << SysOp2_offset;
   2839   }
   2840 
   2841   static Instr ImmBarrierDomain(int imm2) {
   2842     VIXL_ASSERT(IsUint2(imm2));
   2843     return imm2 << ImmBarrierDomain_offset;
   2844   }
   2845 
   2846   static Instr ImmBarrierType(int imm2) {
   2847     VIXL_ASSERT(IsUint2(imm2));
   2848     return imm2 << ImmBarrierType_offset;
   2849   }
   2850 
   2851   // Move immediates encoding.
   2852   static Instr ImmMoveWide(uint64_t imm) {
   2853     VIXL_ASSERT(IsUint16(imm));
   2854     return static_cast<Instr>(imm << ImmMoveWide_offset);
   2855   }
   2856 
   2857   static Instr ShiftMoveWide(int64_t shift) {
   2858     VIXL_ASSERT(IsUint2(shift));
   2859     return static_cast<Instr>(shift << ShiftMoveWide_offset);
   2860   }
   2861 
   2862   // FP Immediates.
   2863   static Instr ImmFP32(float imm);
   2864   static Instr ImmFP64(double imm);
   2865 
   2866   // FP register type.
   2867   static Instr FPType(FPRegister fd) { return fd.Is64Bits() ? FP64 : FP32; }
   2868 
   2869   static Instr FPScale(unsigned scale) {
   2870     VIXL_ASSERT(IsUint6(scale));
   2871     return scale << FPScale_offset;
   2872   }
   2873 
   2874   // Immediate field checking helpers.
   2875   static bool IsImmAddSub(int64_t immediate);
   2876   static bool IsImmConditionalCompare(int64_t immediate);
   2877   static bool IsImmFP32(float imm);
   2878   static bool IsImmFP64(double imm);
   2879   static bool IsImmLogical(uint64_t value,
   2880                            unsigned width,
   2881                            unsigned* n = NULL,
   2882                            unsigned* imm_s = NULL,
   2883                            unsigned* imm_r = NULL);
   2884   static bool IsImmLSPair(int64_t offset, unsigned access_size);
   2885   static bool IsImmLSScaled(int64_t offset, unsigned access_size);
   2886   static bool IsImmLSUnscaled(int64_t offset);
   2887   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
   2888   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
   2889 
   2890   // Instruction bits for vector format in data processing operations.
   2891   static Instr VFormat(VRegister vd) {
   2892     if (vd.Is64Bits()) {
   2893       switch (vd.GetLanes()) {
   2894         case 2:
   2895           return NEON_2S;
   2896         case 4:
   2897           return NEON_4H;
   2898         case 8:
   2899           return NEON_8B;
   2900         default:
   2901           return 0xffffffff;
   2902       }
   2903     } else {
   2904       VIXL_ASSERT(vd.Is128Bits());
   2905       switch (vd.GetLanes()) {
   2906         case 2:
   2907           return NEON_2D;
   2908         case 4:
   2909           return NEON_4S;
   2910         case 8:
   2911           return NEON_8H;
   2912         case 16:
   2913           return NEON_16B;
   2914         default:
   2915           return 0xffffffff;
   2916       }
   2917     }
   2918   }
   2919 
   2920   // Instruction bits for vector format in floating point data processing
   2921   // operations.
   2922   static Instr FPFormat(VRegister vd) {
   2923     if (vd.GetLanes() == 1) {
   2924       // Floating point scalar formats.
   2925       VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
   2926       return vd.Is64Bits() ? FP64 : FP32;
   2927     }
   2928 
   2929     // Two lane floating point vector formats.
   2930     if (vd.GetLanes() == 2) {
   2931       VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
   2932       return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
   2933     }
   2934 
   2935     // Four lane floating point vector format.
   2936     VIXL_ASSERT((vd.GetLanes() == 4) && vd.Is128Bits());
   2937     return NEON_FP_4S;
   2938   }
   2939 
   2940   // Instruction bits for vector format in load and store operations.
   2941   static Instr LSVFormat(VRegister vd) {
   2942     if (vd.Is64Bits()) {
   2943       switch (vd.GetLanes()) {
   2944         case 1:
   2945           return LS_NEON_1D;
   2946         case 2:
   2947           return LS_NEON_2S;
   2948         case 4:
   2949           return LS_NEON_4H;
   2950         case 8:
   2951           return LS_NEON_8B;
   2952         default:
   2953           return 0xffffffff;
   2954       }
   2955     } else {
   2956       VIXL_ASSERT(vd.Is128Bits());
   2957       switch (vd.GetLanes()) {
   2958         case 2:
   2959           return LS_NEON_2D;
   2960         case 4:
   2961           return LS_NEON_4S;
   2962         case 8:
   2963           return LS_NEON_8H;
   2964         case 16:
   2965           return LS_NEON_16B;
   2966         default:
   2967           return 0xffffffff;
   2968       }
   2969     }
   2970   }
   2971 
   2972   // Instruction bits for scalar format in data processing operations.
   2973   static Instr SFormat(VRegister vd) {
   2974     VIXL_ASSERT(vd.GetLanes() == 1);
   2975     switch (vd.GetSizeInBytes()) {
   2976       case 1:
   2977         return NEON_B;
   2978       case 2:
   2979         return NEON_H;
   2980       case 4:
   2981         return NEON_S;
   2982       case 8:
   2983         return NEON_D;
   2984       default:
   2985         return 0xffffffff;
   2986     }
   2987   }
   2988 
   2989   static Instr ImmNEONHLM(int index, int num_bits) {
   2990     int h, l, m;
   2991     if (num_bits == 3) {
   2992       VIXL_ASSERT(IsUint3(index));
   2993       h = (index >> 2) & 1;
   2994       l = (index >> 1) & 1;
   2995       m = (index >> 0) & 1;
   2996     } else if (num_bits == 2) {
   2997       VIXL_ASSERT(IsUint2(index));
   2998       h = (index >> 1) & 1;
   2999       l = (index >> 0) & 1;
   3000       m = 0;
   3001     } else {
   3002       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
   3003       h = (index >> 0) & 1;
   3004       l = 0;
   3005       m = 0;
   3006     }
   3007     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
   3008   }
   3009 
   3010   static Instr ImmNEONExt(int imm4) {
   3011     VIXL_ASSERT(IsUint4(imm4));
   3012     return imm4 << ImmNEONExt_offset;
   3013   }
   3014 
   3015   static Instr ImmNEON5(Instr format, int index) {
   3016     VIXL_ASSERT(IsUint4(index));
   3017     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
   3018     int imm5 = (index << (s + 1)) | (1 << s);
   3019     return imm5 << ImmNEON5_offset;
   3020   }
   3021 
   3022   static Instr ImmNEON4(Instr format, int index) {
   3023     VIXL_ASSERT(IsUint4(index));
   3024     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
   3025     int imm4 = index << s;
   3026     return imm4 << ImmNEON4_offset;
   3027   }
   3028 
   3029   static Instr ImmNEONabcdefgh(int imm8) {
   3030     VIXL_ASSERT(IsUint8(imm8));
   3031     Instr instr;
   3032     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
   3033     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
   3034     return instr;
   3035   }
   3036 
   3037   static Instr NEONCmode(int cmode) {
   3038     VIXL_ASSERT(IsUint4(cmode));
   3039     return cmode << NEONCmode_offset;
   3040   }
   3041 
   3042   static Instr NEONModImmOp(int op) {
   3043     VIXL_ASSERT(IsUint1(op));
   3044     return op << NEONModImmOp_offset;
   3045   }
   3046 
   3047   // Size of the code generated since label to the current position.
   3048   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
   3049     VIXL_ASSERT(label->IsBound());
   3050     return GetBuffer().GetOffsetFrom(label->GetLocation());
   3051   }
   3052   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
   3053                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
   3054     return GetSizeOfCodeGeneratedSince(label);
   3055   }
   3056 
   3057   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
   3058                   size_t GetBufferCapacity() const) {
   3059     return GetBuffer().GetCapacity();
   3060   }
   3061   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
   3062     return GetBuffer().GetCapacity();
   3063   }
   3064 
   3065   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
   3066                   size_t GetRemainingBufferSpace() const) {
   3067     return GetBuffer().GetRemainingBytes();
   3068   }
   3069   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
   3070                   size_t RemainingBufferSpace() const) {
   3071     return GetBuffer().GetRemainingBytes();
   3072   }
   3073 
   3074   PositionIndependentCodeOption GetPic() const { return pic_; }
   3075   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
   3076     return GetPic();
   3077   }
   3078 
   3079   bool AllowPageOffsetDependentCode() const {
   3080     return (GetPic() == PageOffsetDependentCode) ||
   3081            (GetPic() == PositionDependentCode);
   3082   }
   3083 
   3084   static Register AppropriateZeroRegFor(const CPURegister& reg) {
   3085     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
   3086   }
   3087 
   3088  protected:
   3089   void LoadStore(const CPURegister& rt,
   3090                  const MemOperand& addr,
   3091                  LoadStoreOp op,
   3092                  LoadStoreScalingOption option = PreferScaledOffset);
   3093 
   3094   void LoadStorePair(const CPURegister& rt,
   3095                      const CPURegister& rt2,
   3096                      const MemOperand& addr,
   3097                      LoadStorePairOp op);
   3098   void LoadStoreStruct(const VRegister& vt,
   3099                        const MemOperand& addr,
   3100                        NEONLoadStoreMultiStructOp op);
   3101   void LoadStoreStruct1(const VRegister& vt,
   3102                         int reg_count,
   3103                         const MemOperand& addr);
   3104   void LoadStoreStructSingle(const VRegister& vt,
   3105                              uint32_t lane,
   3106                              const MemOperand& addr,
   3107                              NEONLoadStoreSingleStructOp op);
   3108   void LoadStoreStructSingleAllLanes(const VRegister& vt,
   3109                                      const MemOperand& addr,
   3110                                      NEONLoadStoreSingleStructOp op);
   3111   void LoadStoreStructVerify(const VRegister& vt,
   3112                              const MemOperand& addr,
   3113                              Instr op);
   3114 
   3115   void Prefetch(PrefetchOperation op,
   3116                 const MemOperand& addr,
   3117                 LoadStoreScalingOption option = PreferScaledOffset);
   3118 
   3119   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
   3120   // reports a bogus uninitialised warning then.
   3121   void Logical(const Register& rd,
   3122                const Register& rn,
   3123                const Operand operand,
   3124                LogicalOp op);
   3125   void LogicalImmediate(const Register& rd,
   3126                         const Register& rn,
   3127                         unsigned n,
   3128                         unsigned imm_s,
   3129                         unsigned imm_r,
   3130                         LogicalOp op);
   3131 
   3132   void ConditionalCompare(const Register& rn,
   3133                           const Operand& operand,
   3134                           StatusFlags nzcv,
   3135                           Condition cond,
   3136                           ConditionalCompareOp op);
   3137 
   3138   void AddSubWithCarry(const Register& rd,
   3139                        const Register& rn,
   3140                        const Operand& operand,
   3141                        FlagsUpdate S,
   3142                        AddSubWithCarryOp op);
   3143 
   3144 
   3145   // Functions for emulating operands not directly supported by the instruction
   3146   // set.
   3147   void EmitShift(const Register& rd,
   3148                  const Register& rn,
   3149                  Shift shift,
   3150                  unsigned amount);
   3151   void EmitExtendShift(const Register& rd,
   3152                        const Register& rn,
   3153                        Extend extend,
   3154                        unsigned left_shift);
   3155 
   3156   void AddSub(const Register& rd,
   3157               const Register& rn,
   3158               const Operand& operand,
   3159               FlagsUpdate S,
   3160               AddSubOp op);
   3161 
   3162   void NEONTable(const VRegister& vd,
   3163                  const VRegister& vn,
   3164                  const VRegister& vm,
   3165                  NEONTableOp op);
   3166 
   3167   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
   3168   // registers. Only simple loads are supported; sign- and zero-extension (such
   3169   // as in LDPSW_x or LDRB_w) are not supported.
   3170   static LoadStoreOp LoadOpFor(const CPURegister& rt);
   3171   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
   3172                                        const CPURegister& rt2);
   3173   static LoadStoreOp StoreOpFor(const CPURegister& rt);
   3174   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
   3175                                         const CPURegister& rt2);
   3176   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
   3177       const CPURegister& rt, const CPURegister& rt2);
   3178   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
   3179       const CPURegister& rt, const CPURegister& rt2);
   3180   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
   3181 
   3182 
   3183  private:
   3184   static uint32_t FP32ToImm8(float imm);
   3185   static uint32_t FP64ToImm8(double imm);
   3186 
   3187   // Instruction helpers.
   3188   void MoveWide(const Register& rd,
   3189                 uint64_t imm,
   3190                 int shift,
   3191                 MoveWideImmediateOp mov_op);
   3192   void DataProcShiftedRegister(const Register& rd,
   3193                                const Register& rn,
   3194                                const Operand& operand,
   3195                                FlagsUpdate S,
   3196                                Instr op);
   3197   void DataProcExtendedRegister(const Register& rd,
   3198                                 const Register& rn,
   3199                                 const Operand& operand,
   3200                                 FlagsUpdate S,
   3201                                 Instr op);
   3202   void LoadStorePairNonTemporal(const CPURegister& rt,
   3203                                 const CPURegister& rt2,
   3204                                 const MemOperand& addr,
   3205                                 LoadStorePairNonTemporalOp op);
   3206   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
   3207   void ConditionalSelect(const Register& rd,
   3208                          const Register& rn,
   3209                          const Register& rm,
   3210                          Condition cond,
   3211                          ConditionalSelectOp op);
   3212   void DataProcessing1Source(const Register& rd,
   3213                              const Register& rn,
   3214                              DataProcessing1SourceOp op);
   3215   void DataProcessing3Source(const Register& rd,
   3216                              const Register& rn,
   3217                              const Register& rm,
   3218                              const Register& ra,
   3219                              DataProcessing3SourceOp op);
   3220   void FPDataProcessing1Source(const VRegister& fd,
   3221                                const VRegister& fn,
   3222                                FPDataProcessing1SourceOp op);
   3223   void FPDataProcessing3Source(const VRegister& fd,
   3224                                const VRegister& fn,
   3225                                const VRegister& fm,
   3226                                const VRegister& fa,
   3227                                FPDataProcessing3SourceOp op);
   3228   void NEONAcrossLanesL(const VRegister& vd,
   3229                         const VRegister& vn,
   3230                         NEONAcrossLanesOp op);
   3231   void NEONAcrossLanes(const VRegister& vd,
   3232                        const VRegister& vn,
   3233                        NEONAcrossLanesOp op);
   3234   void NEONModifiedImmShiftLsl(const VRegister& vd,
   3235                                const int imm8,
   3236                                const int left_shift,
   3237                                NEONModifiedImmediateOp op);
   3238   void NEONModifiedImmShiftMsl(const VRegister& vd,
   3239                                const int imm8,
   3240                                const int shift_amount,
   3241                                NEONModifiedImmediateOp op);
   3242   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
   3243   void NEON3Same(const VRegister& vd,
   3244                  const VRegister& vn,
   3245                  const VRegister& vm,
   3246                  NEON3SameOp vop);
   3247   void NEONFP3Same(const VRegister& vd,
   3248                    const VRegister& vn,
   3249                    const VRegister& vm,
   3250                    Instr op);
   3251   void NEON3DifferentL(const VRegister& vd,
   3252                        const VRegister& vn,
   3253                        const VRegister& vm,
   3254                        NEON3DifferentOp vop);
   3255   void NEON3DifferentW(const VRegister& vd,
   3256                        const VRegister& vn,
   3257                        const VRegister& vm,
   3258                        NEON3DifferentOp vop);
   3259   void NEON3DifferentHN(const VRegister& vd,
   3260                         const VRegister& vn,
   3261                         const VRegister& vm,
   3262                         NEON3DifferentOp vop);
   3263   void NEONFP2RegMisc(const VRegister& vd,
   3264                       const VRegister& vn,
   3265                       NEON2RegMiscOp vop,
   3266                       double value = 0.0);
   3267   void NEON2RegMisc(const VRegister& vd,
   3268                     const VRegister& vn,
   3269                     NEON2RegMiscOp vop,
   3270                     int value = 0);
   3271   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
   3272   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
   3273   void NEONPerm(const VRegister& vd,
   3274                 const VRegister& vn,
   3275                 const VRegister& vm,
   3276                 NEONPermOp op);
   3277   void NEONFPByElement(const VRegister& vd,
   3278                        const VRegister& vn,
   3279                        const VRegister& vm,
   3280                        int vm_index,
   3281                        NEONByIndexedElementOp op);
   3282   void NEONByElement(const VRegister& vd,
   3283                      const VRegister& vn,
   3284                      const VRegister& vm,
   3285                      int vm_index,
   3286                      NEONByIndexedElementOp op);
   3287   void NEONByElementL(const VRegister& vd,
   3288                       const VRegister& vn,
   3289                       const VRegister& vm,
   3290                       int vm_index,
   3291                       NEONByIndexedElementOp op);
   3292   void NEONShiftImmediate(const VRegister& vd,
   3293                           const VRegister& vn,
   3294                           NEONShiftImmediateOp op,
   3295                           int immh_immb);
   3296   void NEONShiftLeftImmediate(const VRegister& vd,
   3297                               const VRegister& vn,
   3298                               int shift,
   3299                               NEONShiftImmediateOp op);
   3300   void NEONShiftRightImmediate(const VRegister& vd,
   3301                                const VRegister& vn,
   3302                                int shift,
   3303                                NEONShiftImmediateOp op);
   3304   void NEONShiftImmediateL(const VRegister& vd,
   3305                            const VRegister& vn,
   3306                            int shift,
   3307                            NEONShiftImmediateOp op);
   3308   void NEONShiftImmediateN(const VRegister& vd,
   3309                            const VRegister& vn,
   3310                            int shift,
   3311                            NEONShiftImmediateOp op);
   3312   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
   3313 
   3314   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
   3315 
   3316   // Encode the specified MemOperand for the specified access size and scaling
   3317   // preference.
   3318   Instr LoadStoreMemOperand(const MemOperand& addr,
   3319                             unsigned access_size,
   3320                             LoadStoreScalingOption option);
   3321 
   3322   // Link the current (not-yet-emitted) instruction to the specified label, then
   3323   // return an offset to be encoded in the instruction. If the label is not yet
   3324   // bound, an offset of 0 is returned.
   3325   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
   3326   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
   3327   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
   3328 
   3329   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
   3330   template <int element_shift>
   3331   ptrdiff_t LinkAndGetOffsetTo(Label* label);
   3332 
   3333   // Literal load offset are in words (32-bit).
   3334   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
   3335 
   3336   // Emit the instruction in buffer_.
   3337   void Emit(Instr instruction) {
   3338     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
   3339     VIXL_ASSERT(AllowAssembler());
   3340     GetBuffer()->Emit32(instruction);
   3341   }
   3342 
   3343   PositionIndependentCodeOption pic_;
   3344 };
   3345 
   3346 
   3347 template <typename T>
   3348 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
   3349   return UpdateValue(new_value,
   3350                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
   3351 }
   3352 
   3353 
   3354 template <typename T>
   3355 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
   3356   return UpdateValue(high64,
   3357                      low64,
   3358                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
   3359 }
   3360 
   3361 
   3362 }  // namespace aarch64
   3363 
   3364 // Required InvalSet template specialisations.
   3365 // TODO: These template specialisations should not live in this file.  Move
   3366 // Label out of the aarch64 namespace in order to share its implementation
   3367 // later.
   3368 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
   3369   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
   3370       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
   3371       aarch64::Label::kReclaimFactor
   3372 template <>
   3373 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
   3374     const ptrdiff_t& element) {
   3375   return element;
   3376 }
   3377 template <>
   3378 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
   3379                                                             ptrdiff_t key) {
   3380   *element = key;
   3381 }
   3382 #undef INVAL_SET_TEMPLATE_PARAMETERS
   3383 
   3384 }  // namespace vixl
   3385 
   3386 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
   3387