Home | History | Annotate | Download | only in aarch64
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
     28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
     29 
     30 #include "../assembler-base-vixl.h"
     31 #include "../code-generation-scopes-vixl.h"
     32 #include "../globals-vixl.h"
     33 #include "../invalset-vixl.h"
     34 #include "../utils-vixl.h"
     35 
     36 #include "operands-aarch64.h"
     37 
     38 namespace vixl {
     39 namespace aarch64 {
     40 
     41 class LabelTestHelper;  // Forward declaration.
     42 
     43 
     44 class Label {
     45  public:
     46   Label() : location_(kLocationUnbound) {}
     47   ~Label() {
     48     // All links to a label must have been resolved before it is destructed.
     49     VIXL_ASSERT(!IsLinked());
     50   }
     51 
     52   bool IsBound() const { return location_ >= 0; }
     53   bool IsLinked() const { return !links_.empty(); }
     54 
     55   ptrdiff_t GetLocation() const { return location_; }
     56   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
     57     return GetLocation();
     58   }
     59 
     60   static const int kNPreallocatedLinks = 4;
     61   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
     62   static const size_t kReclaimFrom = 512;
     63   static const size_t kReclaimFactor = 2;
     64 
     65   typedef InvalSet<ptrdiff_t,
     66                    kNPreallocatedLinks,
     67                    ptrdiff_t,
     68                    kInvalidLinkKey,
     69                    kReclaimFrom,
     70                    kReclaimFactor>
     71       LinksSetBase;
     72   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
     73 
     74  private:
     75   class LinksSet : public LinksSetBase {
     76    public:
     77     LinksSet() : LinksSetBase() {}
     78   };
     79 
     80   // Allows iterating over the links of a label. The behaviour is undefined if
     81   // the list of links is modified in any way while iterating.
     82   class LabelLinksIterator : public LabelLinksIteratorBase {
     83    public:
     84     explicit LabelLinksIterator(Label* label)
     85         : LabelLinksIteratorBase(&label->links_) {}
     86 
     87     // TODO: Remove these and use the STL-like interface instead.
     88     using LabelLinksIteratorBase::Advance;
     89     using LabelLinksIteratorBase::Current;
     90   };
     91 
     92   void Bind(ptrdiff_t location) {
     93     // Labels can only be bound once.
     94     VIXL_ASSERT(!IsBound());
     95     location_ = location;
     96   }
     97 
     98   void AddLink(ptrdiff_t instruction) {
     99     // If a label is bound, the assembler already has the information it needs
    100     // to write the instruction, so there is no need to add it to links_.
    101     VIXL_ASSERT(!IsBound());
    102     links_.insert(instruction);
    103   }
    104 
    105   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
    106 
    107   void ClearAllLinks() { links_.clear(); }
    108 
    109   // TODO: The comment below considers average case complexity for our
    110   // usual use-cases. The elements of interest are:
    111   // - Branches to a label are emitted in order: branch instructions to a label
    112   // are generated at an offset in the code generation buffer greater than any
    113   // other branch to that same label already generated. As an example, this can
    114   // be broken when an instruction is patched to become a branch. Note that the
    115   // code will still work, but the complexity considerations below may locally
    116   // not apply any more.
    117   // - Veneers are generated in order: for multiple branches of the same type
    118   // branching to the same unbound label going out of range, veneers are
    119   // generated in growing order of the branch instruction offset from the start
    120   // of the buffer.
    121   //
    122   // When creating a veneer for a branch going out of range, the link for this
    123   // branch needs to be removed from this `links_`. Since all branches are
    124   // tracked in one underlying InvalSet, the complexity for this deletion is the
    125   // same as for finding the element, ie. O(n), where n is the number of links
    126   // in the set.
    127   // This could be reduced to O(1) by using the same trick as used when tracking
    128   // branch information for veneers: split the container to use one set per type
    129   // of branch. With that setup, when a veneer is created and the link needs to
    130   // be deleted, if the two points above hold, it must be the minimum element of
    131   // the set for its type of branch, and that minimum element will be accessible
    132   // in O(1).
    133 
    134   // The offsets of the instructions that have linked to this label.
    135   LinksSet links_;
    136   // The label location.
    137   ptrdiff_t location_;
    138 
    139   static const ptrdiff_t kLocationUnbound = -1;
    140 
    141 // It is not safe to copy labels, so disable the copy constructor and operator
    142 // by declaring them private (without an implementation).
    143 #if __cplusplus >= 201103L
    144   Label(const Label&) = delete;
    145   void operator=(const Label&) = delete;
    146 #else
    147   Label(const Label&);
    148   void operator=(const Label&);
    149 #endif
    150 
    151   // The Assembler class is responsible for binding and linking labels, since
    152   // the stored offsets need to be consistent with the Assembler's buffer.
    153   friend class Assembler;
    154   // The MacroAssembler and VeneerPool handle resolution of branches to distant
    155   // targets.
    156   friend class MacroAssembler;
    157   friend class VeneerPool;
    158 };
    159 
    160 
    161 class Assembler;
    162 class LiteralPool;
    163 
    164 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
    165 // stream and loaded through a pc relative load. The same literal can be
    166 // referred to by multiple instructions but a literal can only reside at one
    167 // place in memory. A literal can be used by a load before or after being
    168 // placed in memory.
    169 //
    170 // Internally an offset of 0 is associated with a literal which has been
    171 // neither used nor placed. Then two possibilities arise:
    172 //  1) the label is placed, the offset (stored as offset + 1) is used to
    173 //     resolve any subsequent load using the label.
    174 //  2) the label is not placed and offset is the offset of the last load using
    175 //     the literal (stored as -offset -1). If multiple loads refer to this
    176 //     literal then the last load holds the offset of the preceding load and
    177 //     all loads form a chain. Once the offset is placed all the loads in the
    178 //     chain are resolved and future loads fall back to possibility 1.
    179 class RawLiteral {
    180  public:
    181   enum DeletionPolicy {
    182     kDeletedOnPlacementByPool,
    183     kDeletedOnPoolDestruction,
    184     kManuallyDeleted
    185   };
    186 
    187   RawLiteral(size_t size,
    188              LiteralPool* literal_pool,
    189              DeletionPolicy deletion_policy = kManuallyDeleted);
    190 
    191   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
    192   // actually pointing to `Literal<T>` objects.
    193   virtual ~RawLiteral() {}
    194 
    195   size_t GetSize() const {
    196     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
    197     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
    198     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
    199                 (size_ == kQRegSizeInBytes));
    200     return size_;
    201   }
    202   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
    203 
    204   uint64_t GetRawValue128Low64() const {
    205     VIXL_ASSERT(size_ == kQRegSizeInBytes);
    206     return low64_;
    207   }
    208   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
    209     return GetRawValue128Low64();
    210   }
    211 
    212   uint64_t GetRawValue128High64() const {
    213     VIXL_ASSERT(size_ == kQRegSizeInBytes);
    214     return high64_;
    215   }
    216   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
    217     return GetRawValue128High64();
    218   }
    219 
    220   uint64_t GetRawValue64() const {
    221     VIXL_ASSERT(size_ == kXRegSizeInBytes);
    222     VIXL_ASSERT(high64_ == 0);
    223     return low64_;
    224   }
    225   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
    226     return GetRawValue64();
    227   }
    228 
    229   uint32_t GetRawValue32() const {
    230     VIXL_ASSERT(size_ == kWRegSizeInBytes);
    231     VIXL_ASSERT(high64_ == 0);
    232     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
    233     return static_cast<uint32_t>(low64_);
    234   }
    235   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
    236     return GetRawValue32();
    237   }
    238 
    239   bool IsUsed() const { return offset_ < 0; }
    240   bool IsPlaced() const { return offset_ > 0; }
    241 
    242   LiteralPool* GetLiteralPool() const { return literal_pool_; }
    243 
    244   ptrdiff_t GetOffset() const {
    245     VIXL_ASSERT(IsPlaced());
    246     return offset_ - 1;
    247   }
    248   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
    249 
    250  protected:
    251   void SetOffset(ptrdiff_t offset) {
    252     VIXL_ASSERT(offset >= 0);
    253     VIXL_ASSERT(IsWordAligned(offset));
    254     VIXL_ASSERT(!IsPlaced());
    255     offset_ = offset + 1;
    256   }
    257   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
    258     SetOffset(offset);
    259   }
    260 
    261   ptrdiff_t GetLastUse() const {
    262     VIXL_ASSERT(IsUsed());
    263     return -offset_ - 1;
    264   }
    265   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
    266 
    267   void SetLastUse(ptrdiff_t offset) {
    268     VIXL_ASSERT(offset >= 0);
    269     VIXL_ASSERT(IsWordAligned(offset));
    270     VIXL_ASSERT(!IsPlaced());
    271     offset_ = -offset - 1;
    272   }
    273   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
    274     SetLastUse(offset);
    275   }
    276 
    277   size_t size_;
    278   ptrdiff_t offset_;
    279   uint64_t low64_;
    280   uint64_t high64_;
    281 
    282  private:
    283   LiteralPool* literal_pool_;
    284   DeletionPolicy deletion_policy_;
    285 
    286   friend class Assembler;
    287   friend class LiteralPool;
    288 };
    289 
    290 
    291 template <typename T>
    292 class Literal : public RawLiteral {
    293  public:
    294   explicit Literal(T value,
    295                    LiteralPool* literal_pool = NULL,
    296                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
    297       : RawLiteral(sizeof(value), literal_pool, ownership) {
    298     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
    299     UpdateValue(value);
    300   }
    301 
    302   Literal(T high64,
    303           T low64,
    304           LiteralPool* literal_pool = NULL,
    305           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
    306       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
    307     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
    308     UpdateValue(high64, low64);
    309   }
    310 
    311   virtual ~Literal() {}
    312 
    313   // Update the value of this literal, if necessary by rewriting the value in
    314   // the pool.
    315   // If the literal has already been placed in a literal pool, the address of
    316   // the start of the code buffer must be provided, as the literal only knows it
    317   // offset from there. This also allows patching the value after the code has
    318   // been moved in memory.
    319   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
    320     VIXL_ASSERT(sizeof(new_value) == size_);
    321     memcpy(&low64_, &new_value, sizeof(new_value));
    322     if (IsPlaced()) {
    323       VIXL_ASSERT(code_buffer != NULL);
    324       RewriteValueInCode(code_buffer);
    325     }
    326   }
    327 
    328   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
    329     VIXL_ASSERT(sizeof(low64) == size_ / 2);
    330     memcpy(&low64_, &low64, sizeof(low64));
    331     memcpy(&high64_, &high64, sizeof(high64));
    332     if (IsPlaced()) {
    333       VIXL_ASSERT(code_buffer != NULL);
    334       RewriteValueInCode(code_buffer);
    335     }
    336   }
    337 
    338   void UpdateValue(T new_value, const Assembler* assembler);
    339   void UpdateValue(T high64, T low64, const Assembler* assembler);
    340 
    341  private:
    342   void RewriteValueInCode(uint8_t* code_buffer) {
    343     VIXL_ASSERT(IsPlaced());
    344     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
    345     switch (GetSize()) {
    346       case kSRegSizeInBytes:
    347         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
    348             GetRawValue32();
    349         break;
    350       case kDRegSizeInBytes:
    351         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
    352             GetRawValue64();
    353         break;
    354       default:
    355         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
    356         uint64_t* base_address =
    357             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
    358         *base_address = GetRawValue128Low64();
    359         *(base_address + 1) = GetRawValue128High64();
    360     }
    361   }
    362 };
    363 
    364 
    365 // Control whether or not position-independent code should be emitted.
    366 enum PositionIndependentCodeOption {
    367   // All code generated will be position-independent; all branches and
    368   // references to labels generated with the Label class will use PC-relative
    369   // addressing.
    370   PositionIndependentCode,
    371 
    372   // Allow VIXL to generate code that refers to absolute addresses. With this
    373   // option, it will not be possible to copy the code buffer and run it from a
    374   // different address; code must be generated in its final location.
    375   PositionDependentCode,
    376 
    377   // Allow VIXL to assume that the bottom 12 bits of the address will be
    378   // constant, but that the top 48 bits may change. This allows `adrp` to
    379   // function in systems which copy code between pages, but otherwise maintain
    380   // 4KB page alignment.
    381   PageOffsetDependentCode
    382 };
    383 
    384 
    385 // Control how scaled- and unscaled-offset loads and stores are generated.
    386 enum LoadStoreScalingOption {
    387   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
    388   // register-offset, pre-index or post-index instructions if necessary.
    389   PreferScaledOffset,
    390 
    391   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
    392   // register-offset, pre-index or post-index instructions if necessary.
    393   PreferUnscaledOffset,
    394 
    395   // Require scaled-immediate-offset instructions.
    396   RequireScaledOffset,
    397 
    398   // Require unscaled-immediate-offset instructions.
    399   RequireUnscaledOffset
    400 };
    401 
    402 
    403 // Assembler.
    404 class Assembler : public vixl::internal::AssemblerBase {
    405  public:
    406   explicit Assembler(
    407       PositionIndependentCodeOption pic = PositionIndependentCode)
    408       : pic_(pic) {}
    409   explicit Assembler(
    410       size_t capacity,
    411       PositionIndependentCodeOption pic = PositionIndependentCode)
    412       : AssemblerBase(capacity), pic_(pic) {}
    413   Assembler(byte* buffer,
    414             size_t capacity,
    415             PositionIndependentCodeOption pic = PositionIndependentCode)
    416       : AssemblerBase(buffer, capacity), pic_(pic) {}
    417 
    418   // Upon destruction, the code will assert that one of the following is true:
    419   //  * The Assembler object has not been used.
    420   //  * Nothing has been emitted since the last Reset() call.
    421   //  * Nothing has been emitted since the last FinalizeCode() call.
    422   ~Assembler() {}
    423 
    424   // System functions.
    425 
    426   // Start generating code from the beginning of the buffer, discarding any code
    427   // and data that has already been emitted into the buffer.
    428   void Reset();
    429 
    430   // Label.
    431   // Bind a label to the current PC.
    432   void bind(Label* label);
    433 
    434   // Bind a label to a specified offset from the start of the buffer.
    435   void BindToOffset(Label* label, ptrdiff_t offset);
    436 
    437   // Place a literal at the current PC.
    438   void place(RawLiteral* literal);
    439 
    440   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
    441     return GetCursorOffset();
    442   }
    443 
    444   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
    445                   ptrdiff_t GetBufferEndOffset() const) {
    446     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
    447   }
    448   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
    449                   ptrdiff_t BufferEndOffset() const) {
    450     return GetBuffer().GetCapacity();
    451   }
    452 
    453   // Return the address of a bound label.
    454   template <typename T>
    455   T GetLabelAddress(const Label* label) const {
    456     VIXL_ASSERT(label->IsBound());
    457     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
    458     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
    459   }
    460 
    461   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
    462     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
    463   }
    464   VIXL_DEPRECATED("GetInstructionAt",
    465                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
    466     return GetInstructionAt(instruction_offset);
    467   }
    468 
    469   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
    470     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
    471     ptrdiff_t offset =
    472         instruction - GetBuffer()->GetStartAddress<Instruction*>();
    473     VIXL_ASSERT((0 <= offset) &&
    474                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
    475     return offset;
    476   }
    477   VIXL_DEPRECATED("GetInstructionOffset",
    478                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
    479     return GetInstructionOffset(instruction);
    480   }
    481 
    482   // Instruction set functions.
    483 
    484   // Branch / Jump instructions.
    485   // Branch to register.
    486   void br(const Register& xn);
    487 
    488   // Branch with link to register.
    489   void blr(const Register& xn);
    490 
    491   // Branch to register with return hint.
    492   void ret(const Register& xn = lr);
    493 
    494   // Unconditional branch to label.
    495   void b(Label* label);
    496 
    497   // Conditional branch to label.
    498   void b(Label* label, Condition cond);
    499 
    500   // Unconditional branch to PC offset.
    501   void b(int64_t imm26);
    502 
    503   // Conditional branch to PC offset.
    504   void b(int64_t imm19, Condition cond);
    505 
    506   // Branch with link to label.
    507   void bl(Label* label);
    508 
    509   // Branch with link to PC offset.
    510   void bl(int64_t imm26);
    511 
    512   // Compare and branch to label if zero.
    513   void cbz(const Register& rt, Label* label);
    514 
    515   // Compare and branch to PC offset if zero.
    516   void cbz(const Register& rt, int64_t imm19);
    517 
    518   // Compare and branch to label if not zero.
    519   void cbnz(const Register& rt, Label* label);
    520 
    521   // Compare and branch to PC offset if not zero.
    522   void cbnz(const Register& rt, int64_t imm19);
    523 
    524   // Table lookup from one register.
    525   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
    526 
    527   // Table lookup from two registers.
    528   void tbl(const VRegister& vd,
    529            const VRegister& vn,
    530            const VRegister& vn2,
    531            const VRegister& vm);
    532 
    533   // Table lookup from three registers.
    534   void tbl(const VRegister& vd,
    535            const VRegister& vn,
    536            const VRegister& vn2,
    537            const VRegister& vn3,
    538            const VRegister& vm);
    539 
    540   // Table lookup from four registers.
    541   void tbl(const VRegister& vd,
    542            const VRegister& vn,
    543            const VRegister& vn2,
    544            const VRegister& vn3,
    545            const VRegister& vn4,
    546            const VRegister& vm);
    547 
    548   // Table lookup extension from one register.
    549   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
    550 
    551   // Table lookup extension from two registers.
    552   void tbx(const VRegister& vd,
    553            const VRegister& vn,
    554            const VRegister& vn2,
    555            const VRegister& vm);
    556 
    557   // Table lookup extension from three registers.
    558   void tbx(const VRegister& vd,
    559            const VRegister& vn,
    560            const VRegister& vn2,
    561            const VRegister& vn3,
    562            const VRegister& vm);
    563 
    564   // Table lookup extension from four registers.
    565   void tbx(const VRegister& vd,
    566            const VRegister& vn,
    567            const VRegister& vn2,
    568            const VRegister& vn3,
    569            const VRegister& vn4,
    570            const VRegister& vm);
    571 
    572   // Test bit and branch to label if zero.
    573   void tbz(const Register& rt, unsigned bit_pos, Label* label);
    574 
    575   // Test bit and branch to PC offset if zero.
    576   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
    577 
    578   // Test bit and branch to label if not zero.
    579   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
    580 
    581   // Test bit and branch to PC offset if not zero.
    582   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
    583 
    584   // Address calculation instructions.
    585   // Calculate a PC-relative address. Unlike for branches the offset in adr is
    586   // unscaled (i.e. the result can be unaligned).
    587 
    588   // Calculate the address of a label.
    589   void adr(const Register& xd, Label* label);
    590 
    591   // Calculate the address of a PC offset.
    592   void adr(const Register& xd, int64_t imm21);
    593 
    594   // Calculate the page address of a label.
    595   void adrp(const Register& xd, Label* label);
    596 
    597   // Calculate the page address of a PC offset.
    598   void adrp(const Register& xd, int64_t imm21);
    599 
    600   // Data Processing instructions.
    601   // Add.
    602   void add(const Register& rd, const Register& rn, const Operand& operand);
    603 
    604   // Add and update status flags.
    605   void adds(const Register& rd, const Register& rn, const Operand& operand);
    606 
    607   // Compare negative.
    608   void cmn(const Register& rn, const Operand& operand);
    609 
    610   // Subtract.
    611   void sub(const Register& rd, const Register& rn, const Operand& operand);
    612 
    613   // Subtract and update status flags.
    614   void subs(const Register& rd, const Register& rn, const Operand& operand);
    615 
    616   // Compare.
    617   void cmp(const Register& rn, const Operand& operand);
    618 
    619   // Negate.
    620   void neg(const Register& rd, const Operand& operand);
    621 
    622   // Negate and update status flags.
    623   void negs(const Register& rd, const Operand& operand);
    624 
    625   // Add with carry bit.
    626   void adc(const Register& rd, const Register& rn, const Operand& operand);
    627 
    628   // Add with carry bit and update status flags.
    629   void adcs(const Register& rd, const Register& rn, const Operand& operand);
    630 
    631   // Subtract with carry bit.
    632   void sbc(const Register& rd, const Register& rn, const Operand& operand);
    633 
    634   // Subtract with carry bit and update status flags.
    635   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
    636 
    637   // Negate with carry bit.
    638   void ngc(const Register& rd, const Operand& operand);
    639 
    640   // Negate with carry bit and update status flags.
    641   void ngcs(const Register& rd, const Operand& operand);
    642 
    643   // Logical instructions.
    644   // Bitwise and (A & B).
    645   void and_(const Register& rd, const Register& rn, const Operand& operand);
    646 
    647   // Bitwise and (A & B) and update status flags.
    648   void ands(const Register& rd, const Register& rn, const Operand& operand);
    649 
    650   // Bit test and set flags.
    651   void tst(const Register& rn, const Operand& operand);
    652 
    653   // Bit clear (A & ~B).
    654   void bic(const Register& rd, const Register& rn, const Operand& operand);
    655 
    656   // Bit clear (A & ~B) and update status flags.
    657   void bics(const Register& rd, const Register& rn, const Operand& operand);
    658 
    659   // Bitwise or (A | B).
    660   void orr(const Register& rd, const Register& rn, const Operand& operand);
    661 
    662   // Bitwise nor (A | ~B).
    663   void orn(const Register& rd, const Register& rn, const Operand& operand);
    664 
    665   // Bitwise eor/xor (A ^ B).
    666   void eor(const Register& rd, const Register& rn, const Operand& operand);
    667 
    668   // Bitwise enor/xnor (A ^ ~B).
    669   void eon(const Register& rd, const Register& rn, const Operand& operand);
    670 
    671   // Logical shift left by variable.
    672   void lslv(const Register& rd, const Register& rn, const Register& rm);
    673 
    674   // Logical shift right by variable.
    675   void lsrv(const Register& rd, const Register& rn, const Register& rm);
    676 
    677   // Arithmetic shift right by variable.
    678   void asrv(const Register& rd, const Register& rn, const Register& rm);
    679 
    680   // Rotate right by variable.
    681   void rorv(const Register& rd, const Register& rn, const Register& rm);
    682 
    683   // Bitfield instructions.
    684   // Bitfield move.
    685   void bfm(const Register& rd,
    686            const Register& rn,
    687            unsigned immr,
    688            unsigned imms);
    689 
    690   // Signed bitfield move.
    691   void sbfm(const Register& rd,
    692             const Register& rn,
    693             unsigned immr,
    694             unsigned imms);
    695 
    696   // Unsigned bitfield move.
    697   void ubfm(const Register& rd,
    698             const Register& rn,
    699             unsigned immr,
    700             unsigned imms);
    701 
    702   // Bfm aliases.
    703   // Bitfield insert.
    704   void bfi(const Register& rd,
    705            const Register& rn,
    706            unsigned lsb,
    707            unsigned width) {
    708     VIXL_ASSERT(width >= 1);
    709     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    710     bfm(rd,
    711         rn,
    712         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
    713         width - 1);
    714   }
    715 
    716   // Bitfield extract and insert low.
    717   void bfxil(const Register& rd,
    718              const Register& rn,
    719              unsigned lsb,
    720              unsigned width) {
    721     VIXL_ASSERT(width >= 1);
    722     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    723     bfm(rd, rn, lsb, lsb + width - 1);
    724   }
    725 
    726   // Sbfm aliases.
    727   // Arithmetic shift right.
    728   void asr(const Register& rd, const Register& rn, unsigned shift) {
    729     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
    730     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
    731   }
    732 
    733   // Signed bitfield insert with zero at right.
    734   void sbfiz(const Register& rd,
    735              const Register& rn,
    736              unsigned lsb,
    737              unsigned width) {
    738     VIXL_ASSERT(width >= 1);
    739     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    740     sbfm(rd,
    741          rn,
    742          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
    743          width - 1);
    744   }
    745 
    746   // Signed bitfield extract.
    747   void sbfx(const Register& rd,
    748             const Register& rn,
    749             unsigned lsb,
    750             unsigned width) {
    751     VIXL_ASSERT(width >= 1);
    752     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    753     sbfm(rd, rn, lsb, lsb + width - 1);
    754   }
    755 
    756   // Signed extend byte.
    757   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
    758 
    759   // Signed extend halfword.
    760   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
    761 
    762   // Signed extend word.
    763   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
    764 
    765   // Ubfm aliases.
    766   // Logical shift left.
    767   void lsl(const Register& rd, const Register& rn, unsigned shift) {
    768     unsigned reg_size = rd.GetSizeInBits();
    769     VIXL_ASSERT(shift < reg_size);
    770     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
    771   }
    772 
    773   // Logical shift right.
    774   void lsr(const Register& rd, const Register& rn, unsigned shift) {
    775     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
    776     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
    777   }
    778 
    779   // Unsigned bitfield insert with zero at right.
    780   void ubfiz(const Register& rd,
    781              const Register& rn,
    782              unsigned lsb,
    783              unsigned width) {
    784     VIXL_ASSERT(width >= 1);
    785     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    786     ubfm(rd,
    787          rn,
    788          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
    789          width - 1);
    790   }
    791 
    792   // Unsigned bitfield extract.
    793   void ubfx(const Register& rd,
    794             const Register& rn,
    795             unsigned lsb,
    796             unsigned width) {
    797     VIXL_ASSERT(width >= 1);
    798     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
    799     ubfm(rd, rn, lsb, lsb + width - 1);
    800   }
    801 
    802   // Unsigned extend byte.
    803   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
    804 
    805   // Unsigned extend halfword.
    806   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
    807 
    808   // Unsigned extend word.
    809   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
    810 
    811   // Extract.
    812   void extr(const Register& rd,
    813             const Register& rn,
    814             const Register& rm,
    815             unsigned lsb);
    816 
    817   // Conditional select: rd = cond ? rn : rm.
    818   void csel(const Register& rd,
    819             const Register& rn,
    820             const Register& rm,
    821             Condition cond);
    822 
    823   // Conditional select increment: rd = cond ? rn : rm + 1.
    824   void csinc(const Register& rd,
    825              const Register& rn,
    826              const Register& rm,
    827              Condition cond);
    828 
    829   // Conditional select inversion: rd = cond ? rn : ~rm.
    830   void csinv(const Register& rd,
    831              const Register& rn,
    832              const Register& rm,
    833              Condition cond);
    834 
    835   // Conditional select negation: rd = cond ? rn : -rm.
    836   void csneg(const Register& rd,
    837              const Register& rn,
    838              const Register& rm,
    839              Condition cond);
    840 
    841   // Conditional set: rd = cond ? 1 : 0.
    842   void cset(const Register& rd, Condition cond);
    843 
    844   // Conditional set mask: rd = cond ? -1 : 0.
    845   void csetm(const Register& rd, Condition cond);
    846 
    847   // Conditional increment: rd = cond ? rn + 1 : rn.
    848   void cinc(const Register& rd, const Register& rn, Condition cond);
    849 
    850   // Conditional invert: rd = cond ? ~rn : rn.
    851   void cinv(const Register& rd, const Register& rn, Condition cond);
    852 
    853   // Conditional negate: rd = cond ? -rn : rn.
    854   void cneg(const Register& rd, const Register& rn, Condition cond);
    855 
    856   // Rotate right.
    857   void ror(const Register& rd, const Register& rs, unsigned shift) {
    858     extr(rd, rs, rs, shift);
    859   }
    860 
    861   // Conditional comparison.
    862   // Conditional compare negative.
    863   void ccmn(const Register& rn,
    864             const Operand& operand,
    865             StatusFlags nzcv,
    866             Condition cond);
    867 
    868   // Conditional compare.
    869   void ccmp(const Register& rn,
    870             const Operand& operand,
    871             StatusFlags nzcv,
    872             Condition cond);
    873 
    874   // CRC-32 checksum from byte.
    875   void crc32b(const Register& wd, const Register& wn, const Register& wm);
    876 
    877   // CRC-32 checksum from half-word.
    878   void crc32h(const Register& wd, const Register& wn, const Register& wm);
    879 
    880   // CRC-32 checksum from word.
    881   void crc32w(const Register& wd, const Register& wn, const Register& wm);
    882 
    883   // CRC-32 checksum from double word.
    884   void crc32x(const Register& wd, const Register& wn, const Register& xm);
    885 
    886   // CRC-32 C checksum from byte.
    887   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
    888 
    889   // CRC-32 C checksum from half-word.
    890   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
    891 
    892   // CRC-32 C checksum from word.
    893   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
    894 
    895   // CRC-32C checksum from double word.
    896   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
    897 
    898   // Multiply.
    899   void mul(const Register& rd, const Register& rn, const Register& rm);
    900 
    901   // Negated multiply.
    902   void mneg(const Register& rd, const Register& rn, const Register& rm);
    903 
    904   // Signed long multiply: 32 x 32 -> 64-bit.
    905   void smull(const Register& xd, const Register& wn, const Register& wm);
    906 
    907   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
    908   void smulh(const Register& xd, const Register& xn, const Register& xm);
    909 
    910   // Multiply and accumulate.
    911   void madd(const Register& rd,
    912             const Register& rn,
    913             const Register& rm,
    914             const Register& ra);
    915 
    916   // Multiply and subtract.
    917   void msub(const Register& rd,
    918             const Register& rn,
    919             const Register& rm,
    920             const Register& ra);
    921 
    922   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
    923   void smaddl(const Register& xd,
    924               const Register& wn,
    925               const Register& wm,
    926               const Register& xa);
    927 
    928   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
    929   void umaddl(const Register& xd,
    930               const Register& wn,
    931               const Register& wm,
    932               const Register& xa);
    933 
    934   // Unsigned long multiply: 32 x 32 -> 64-bit.
    935   void umull(const Register& xd, const Register& wn, const Register& wm) {
    936     umaddl(xd, wn, wm, xzr);
    937   }
    938 
    939   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
    940   void umulh(const Register& xd, const Register& xn, const Register& xm);
    941 
    942   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
    943   void smsubl(const Register& xd,
    944               const Register& wn,
    945               const Register& wm,
    946               const Register& xa);
    947 
    948   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
    949   void umsubl(const Register& xd,
    950               const Register& wn,
    951               const Register& wm,
    952               const Register& xa);
    953 
    954   // Signed integer divide.
    955   void sdiv(const Register& rd, const Register& rn, const Register& rm);
    956 
    957   // Unsigned integer divide.
    958   void udiv(const Register& rd, const Register& rn, const Register& rm);
    959 
    960   // Bit reverse.
    961   void rbit(const Register& rd, const Register& rn);
    962 
    963   // Reverse bytes in 16-bit half words.
    964   void rev16(const Register& rd, const Register& rn);
    965 
    966   // Reverse bytes in 32-bit words.
    967   void rev32(const Register& xd, const Register& xn);
    968 
    969   // Reverse bytes.
    970   void rev(const Register& rd, const Register& rn);
    971 
    972   // Count leading zeroes.
    973   void clz(const Register& rd, const Register& rn);
    974 
    975   // Count leading sign bits.
    976   void cls(const Register& rd, const Register& rn);
    977 
    978   // Memory instructions.
    979   // Load integer or FP register.
    980   void ldr(const CPURegister& rt,
    981            const MemOperand& src,
    982            LoadStoreScalingOption option = PreferScaledOffset);
    983 
    984   // Store integer or FP register.
    985   void str(const CPURegister& rt,
    986            const MemOperand& dst,
    987            LoadStoreScalingOption option = PreferScaledOffset);
    988 
    989   // Load word with sign extension.
    990   void ldrsw(const Register& xt,
    991              const MemOperand& src,
    992              LoadStoreScalingOption option = PreferScaledOffset);
    993 
    994   // Load byte.
    995   void ldrb(const Register& rt,
    996             const MemOperand& src,
    997             LoadStoreScalingOption option = PreferScaledOffset);
    998 
    999   // Store byte.
   1000   void strb(const Register& rt,
   1001             const MemOperand& dst,
   1002             LoadStoreScalingOption option = PreferScaledOffset);
   1003 
   1004   // Load byte with sign extension.
   1005   void ldrsb(const Register& rt,
   1006              const MemOperand& src,
   1007              LoadStoreScalingOption option = PreferScaledOffset);
   1008 
   1009   // Load half-word.
   1010   void ldrh(const Register& rt,
   1011             const MemOperand& src,
   1012             LoadStoreScalingOption option = PreferScaledOffset);
   1013 
   1014   // Store half-word.
   1015   void strh(const Register& rt,
   1016             const MemOperand& dst,
   1017             LoadStoreScalingOption option = PreferScaledOffset);
   1018 
   1019   // Load half-word with sign extension.
   1020   void ldrsh(const Register& rt,
   1021              const MemOperand& src,
   1022              LoadStoreScalingOption option = PreferScaledOffset);
   1023 
   1024   // Load integer or FP register (with unscaled offset).
   1025   void ldur(const CPURegister& rt,
   1026             const MemOperand& src,
   1027             LoadStoreScalingOption option = PreferUnscaledOffset);
   1028 
   1029   // Store integer or FP register (with unscaled offset).
   1030   void stur(const CPURegister& rt,
   1031             const MemOperand& src,
   1032             LoadStoreScalingOption option = PreferUnscaledOffset);
   1033 
   1034   // Load word with sign extension.
   1035   void ldursw(const Register& xt,
   1036               const MemOperand& src,
   1037               LoadStoreScalingOption option = PreferUnscaledOffset);
   1038 
   1039   // Load byte (with unscaled offset).
   1040   void ldurb(const Register& rt,
   1041              const MemOperand& src,
   1042              LoadStoreScalingOption option = PreferUnscaledOffset);
   1043 
   1044   // Store byte (with unscaled offset).
   1045   void sturb(const Register& rt,
   1046              const MemOperand& dst,
   1047              LoadStoreScalingOption option = PreferUnscaledOffset);
   1048 
   1049   // Load byte with sign extension (and unscaled offset).
   1050   void ldursb(const Register& rt,
   1051               const MemOperand& src,
   1052               LoadStoreScalingOption option = PreferUnscaledOffset);
   1053 
   1054   // Load half-word (with unscaled offset).
   1055   void ldurh(const Register& rt,
   1056              const MemOperand& src,
   1057              LoadStoreScalingOption option = PreferUnscaledOffset);
   1058 
   1059   // Store half-word (with unscaled offset).
   1060   void sturh(const Register& rt,
   1061              const MemOperand& dst,
   1062              LoadStoreScalingOption option = PreferUnscaledOffset);
   1063 
   1064   // Load half-word with sign extension (and unscaled offset).
   1065   void ldursh(const Register& rt,
   1066               const MemOperand& src,
   1067               LoadStoreScalingOption option = PreferUnscaledOffset);
   1068 
   1069   // Load integer or FP register pair.
   1070   void ldp(const CPURegister& rt,
   1071            const CPURegister& rt2,
   1072            const MemOperand& src);
   1073 
   1074   // Store integer or FP register pair.
   1075   void stp(const CPURegister& rt,
   1076            const CPURegister& rt2,
   1077            const MemOperand& dst);
   1078 
   1079   // Load word pair with sign extension.
   1080   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
   1081 
   1082   // Load integer or FP register pair, non-temporal.
   1083   void ldnp(const CPURegister& rt,
   1084             const CPURegister& rt2,
   1085             const MemOperand& src);
   1086 
   1087   // Store integer or FP register pair, non-temporal.
   1088   void stnp(const CPURegister& rt,
   1089             const CPURegister& rt2,
   1090             const MemOperand& dst);
   1091 
   1092   // Load integer or FP register from literal pool.
   1093   void ldr(const CPURegister& rt, RawLiteral* literal);
   1094 
   1095   // Load word with sign extension from literal pool.
   1096   void ldrsw(const Register& xt, RawLiteral* literal);
   1097 
   1098   // Load integer or FP register from pc + imm19 << 2.
   1099   void ldr(const CPURegister& rt, int64_t imm19);
   1100 
   1101   // Load word with sign extension from pc + imm19 << 2.
   1102   void ldrsw(const Register& xt, int64_t imm19);
   1103 
   1104   // Store exclusive byte.
   1105   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
   1106 
   1107   // Store exclusive half-word.
   1108   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
   1109 
   1110   // Store exclusive register.
   1111   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
   1112 
   1113   // Load exclusive byte.
   1114   void ldxrb(const Register& rt, const MemOperand& src);
   1115 
   1116   // Load exclusive half-word.
   1117   void ldxrh(const Register& rt, const MemOperand& src);
   1118 
   1119   // Load exclusive register.
   1120   void ldxr(const Register& rt, const MemOperand& src);
   1121 
   1122   // Store exclusive register pair.
   1123   void stxp(const Register& rs,
   1124             const Register& rt,
   1125             const Register& rt2,
   1126             const MemOperand& dst);
   1127 
   1128   // Load exclusive register pair.
   1129   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
   1130 
   1131   // Store-release exclusive byte.
   1132   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
   1133 
   1134   // Store-release exclusive half-word.
   1135   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
   1136 
   1137   // Store-release exclusive register.
   1138   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
   1139 
   1140   // Load-acquire exclusive byte.
   1141   void ldaxrb(const Register& rt, const MemOperand& src);
   1142 
   1143   // Load-acquire exclusive half-word.
   1144   void ldaxrh(const Register& rt, const MemOperand& src);
   1145 
   1146   // Load-acquire exclusive register.
   1147   void ldaxr(const Register& rt, const MemOperand& src);
   1148 
   1149   // Store-release exclusive register pair.
   1150   void stlxp(const Register& rs,
   1151              const Register& rt,
   1152              const Register& rt2,
   1153              const MemOperand& dst);
   1154 
   1155   // Load-acquire exclusive register pair.
   1156   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
   1157 
   1158   // Store-release byte.
   1159   void stlrb(const Register& rt, const MemOperand& dst);
   1160 
   1161   // Store-release half-word.
   1162   void stlrh(const Register& rt, const MemOperand& dst);
   1163 
   1164   // Store-release register.
   1165   void stlr(const Register& rt, const MemOperand& dst);
   1166 
   1167   // Load-acquire byte.
   1168   void ldarb(const Register& rt, const MemOperand& src);
   1169 
   1170   // Load-acquire half-word.
   1171   void ldarh(const Register& rt, const MemOperand& src);
   1172 
   1173   // Load-acquire register.
   1174   void ldar(const Register& rt, const MemOperand& src);
   1175 
   1176   // Prefetch memory.
   1177   void prfm(PrefetchOperation op,
   1178             const MemOperand& addr,
   1179             LoadStoreScalingOption option = PreferScaledOffset);
   1180 
   1181   // Prefetch memory (with unscaled offset).
   1182   void prfum(PrefetchOperation op,
   1183              const MemOperand& addr,
   1184              LoadStoreScalingOption option = PreferUnscaledOffset);
   1185 
   1186   // Prefetch memory in the literal pool.
   1187   void prfm(PrefetchOperation op, RawLiteral* literal);
   1188 
   1189   // Prefetch from pc + imm19 << 2.
   1190   void prfm(PrefetchOperation op, int64_t imm19);
   1191 
   1192   // Move instructions. The default shift of -1 indicates that the move
   1193   // instruction will calculate an appropriate 16-bit immediate and left shift
   1194   // that is equal to the 64-bit immediate argument. If an explicit left shift
   1195   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
   1196   //
   1197   // For movk, an explicit shift can be used to indicate which half word should
   1198   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
   1199   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
   1200   // most-significant.
   1201 
   1202   // Move immediate and keep.
   1203   void movk(const Register& rd, uint64_t imm, int shift = -1) {
   1204     MoveWide(rd, imm, shift, MOVK);
   1205   }
   1206 
   1207   // Move inverted immediate.
   1208   void movn(const Register& rd, uint64_t imm, int shift = -1) {
   1209     MoveWide(rd, imm, shift, MOVN);
   1210   }
   1211 
   1212   // Move immediate.
   1213   void movz(const Register& rd, uint64_t imm, int shift = -1) {
   1214     MoveWide(rd, imm, shift, MOVZ);
   1215   }
   1216 
   1217   // Misc instructions.
   1218   // Monitor debug-mode breakpoint.
   1219   void brk(int code);
   1220 
   1221   // Halting debug-mode breakpoint.
   1222   void hlt(int code);
   1223 
   1224   // Generate exception targeting EL1.
   1225   void svc(int code);
   1226 
   1227   // Move register to register.
   1228   void mov(const Register& rd, const Register& rn);
   1229 
   1230   // Move inverted operand to register.
   1231   void mvn(const Register& rd, const Operand& operand);
   1232 
   1233   // System instructions.
   1234   // Move to register from system register.
   1235   void mrs(const Register& xt, SystemRegister sysreg);
   1236 
   1237   // Move from register to system register.
   1238   void msr(SystemRegister sysreg, const Register& xt);
   1239 
   1240   // System instruction.
   1241   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
   1242 
   1243   // System instruction with pre-encoded op (op1:crn:crm:op2).
   1244   void sys(int op, const Register& xt = xzr);
   1245 
   1246   // System data cache operation.
   1247   void dc(DataCacheOp op, const Register& rt);
   1248 
   1249   // System instruction cache operation.
   1250   void ic(InstructionCacheOp op, const Register& rt);
   1251 
   1252   // System hint.
   1253   void hint(SystemHint code);
   1254 
   1255   // Clear exclusive monitor.
   1256   void clrex(int imm4 = 0xf);
   1257 
   1258   // Data memory barrier.
   1259   void dmb(BarrierDomain domain, BarrierType type);
   1260 
   1261   // Data synchronization barrier.
   1262   void dsb(BarrierDomain domain, BarrierType type);
   1263 
   1264   // Instruction synchronization barrier.
   1265   void isb();
   1266 
   1267   // Alias for system instructions.
   1268   // No-op.
   1269   void nop() { hint(NOP); }
   1270 
   1271   // FP and NEON instructions.
   1272   // Move double precision immediate to FP register.
   1273   void fmov(const VRegister& vd, double imm);
   1274 
   1275   // Move single precision immediate to FP register.
   1276   void fmov(const VRegister& vd, float imm);
   1277 
   1278   // Move FP register to register.
   1279   void fmov(const Register& rd, const VRegister& fn);
   1280 
   1281   // Move register to FP register.
   1282   void fmov(const VRegister& vd, const Register& rn);
   1283 
   1284   // Move FP register to FP register.
   1285   void fmov(const VRegister& vd, const VRegister& fn);
   1286 
   1287   // Move 64-bit register to top half of 128-bit FP register.
   1288   void fmov(const VRegister& vd, int index, const Register& rn);
   1289 
   1290   // Move top half of 128-bit FP register to 64-bit register.
   1291   void fmov(const Register& rd, const VRegister& vn, int index);
   1292 
   1293   // FP add.
   1294   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1295 
   1296   // FP subtract.
   1297   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1298 
   1299   // FP multiply.
   1300   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1301 
   1302   // FP fused multiply-add.
   1303   void fmadd(const VRegister& vd,
   1304              const VRegister& vn,
   1305              const VRegister& vm,
   1306              const VRegister& va);
   1307 
   1308   // FP fused multiply-subtract.
   1309   void fmsub(const VRegister& vd,
   1310              const VRegister& vn,
   1311              const VRegister& vm,
   1312              const VRegister& va);
   1313 
   1314   // FP fused multiply-add and negate.
   1315   void fnmadd(const VRegister& vd,
   1316               const VRegister& vn,
   1317               const VRegister& vm,
   1318               const VRegister& va);
   1319 
   1320   // FP fused multiply-subtract and negate.
   1321   void fnmsub(const VRegister& vd,
   1322               const VRegister& vn,
   1323               const VRegister& vm,
   1324               const VRegister& va);
   1325 
   1326   // FP multiply-negate scalar.
   1327   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1328 
   1329   // FP reciprocal exponent scalar.
   1330   void frecpx(const VRegister& vd, const VRegister& vn);
   1331 
   1332   // FP divide.
   1333   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1334 
   1335   // FP maximum.
   1336   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1337 
   1338   // FP minimum.
   1339   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1340 
   1341   // FP maximum number.
   1342   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1343 
   1344   // FP minimum number.
   1345   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1346 
   1347   // FP absolute.
   1348   void fabs(const VRegister& vd, const VRegister& vn);
   1349 
   1350   // FP negate.
   1351   void fneg(const VRegister& vd, const VRegister& vn);
   1352 
   1353   // FP square root.
   1354   void fsqrt(const VRegister& vd, const VRegister& vn);
   1355 
   1356   // FP round to integer, nearest with ties to away.
   1357   void frinta(const VRegister& vd, const VRegister& vn);
   1358 
   1359   // FP round to integer, implicit rounding.
   1360   void frinti(const VRegister& vd, const VRegister& vn);
   1361 
   1362   // FP round to integer, toward minus infinity.
   1363   void frintm(const VRegister& vd, const VRegister& vn);
   1364 
   1365   // FP round to integer, nearest with ties to even.
   1366   void frintn(const VRegister& vd, const VRegister& vn);
   1367 
   1368   // FP round to integer, toward plus infinity.
   1369   void frintp(const VRegister& vd, const VRegister& vn);
   1370 
   1371   // FP round to integer, exact, implicit rounding.
   1372   void frintx(const VRegister& vd, const VRegister& vn);
   1373 
   1374   // FP round to integer, towards zero.
   1375   void frintz(const VRegister& vd, const VRegister& vn);
   1376 
   1377   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
   1378 
   1379   void FPCompareMacro(const VRegister& vn,
   1380                       const VRegister& vm,
   1381                       FPTrapFlags trap);
   1382 
   1383   // FP compare registers.
   1384   void fcmp(const VRegister& vn, const VRegister& vm);
   1385 
   1386   // FP compare immediate.
   1387   void fcmp(const VRegister& vn, double value);
   1388 
   1389   void FPCCompareMacro(const VRegister& vn,
   1390                        const VRegister& vm,
   1391                        StatusFlags nzcv,
   1392                        Condition cond,
   1393                        FPTrapFlags trap);
   1394 
   1395   // FP conditional compare.
   1396   void fccmp(const VRegister& vn,
   1397              const VRegister& vm,
   1398              StatusFlags nzcv,
   1399              Condition cond);
   1400 
   1401   // FP signaling compare registers.
   1402   void fcmpe(const VRegister& vn, const VRegister& vm);
   1403 
   1404   // FP signaling compare immediate.
   1405   void fcmpe(const VRegister& vn, double value);
   1406 
   1407   // FP conditional signaling compare.
   1408   void fccmpe(const VRegister& vn,
   1409               const VRegister& vm,
   1410               StatusFlags nzcv,
   1411               Condition cond);
   1412 
   1413   // FP conditional select.
   1414   void fcsel(const VRegister& vd,
   1415              const VRegister& vn,
   1416              const VRegister& vm,
   1417              Condition cond);
   1418 
   1419   // Common FP Convert functions.
   1420   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
   1421   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
   1422 
   1423   // FP convert between precisions.
   1424   void fcvt(const VRegister& vd, const VRegister& vn);
   1425 
   1426   // FP convert to higher precision.
   1427   void fcvtl(const VRegister& vd, const VRegister& vn);
   1428 
   1429   // FP convert to higher precision (second part).
   1430   void fcvtl2(const VRegister& vd, const VRegister& vn);
   1431 
   1432   // FP convert to lower precision.
   1433   void fcvtn(const VRegister& vd, const VRegister& vn);
   1434 
   1435   // FP convert to lower prevision (second part).
   1436   void fcvtn2(const VRegister& vd, const VRegister& vn);
   1437 
   1438   // FP convert to lower precision, rounding to odd.
   1439   void fcvtxn(const VRegister& vd, const VRegister& vn);
   1440 
   1441   // FP convert to lower precision, rounding to odd (second part).
   1442   void fcvtxn2(const VRegister& vd, const VRegister& vn);
   1443 
   1444   // FP convert to signed integer, nearest with ties to away.
   1445   void fcvtas(const Register& rd, const VRegister& vn);
   1446 
   1447   // FP convert to unsigned integer, nearest with ties to away.
   1448   void fcvtau(const Register& rd, const VRegister& vn);
   1449 
   1450   // FP convert to signed integer, nearest with ties to away.
   1451   void fcvtas(const VRegister& vd, const VRegister& vn);
   1452 
   1453   // FP convert to unsigned integer, nearest with ties to away.
   1454   void fcvtau(const VRegister& vd, const VRegister& vn);
   1455 
   1456   // FP convert to signed integer, round towards -infinity.
   1457   void fcvtms(const Register& rd, const VRegister& vn);
   1458 
   1459   // FP convert to unsigned integer, round towards -infinity.
   1460   void fcvtmu(const Register& rd, const VRegister& vn);
   1461 
   1462   // FP convert to signed integer, round towards -infinity.
   1463   void fcvtms(const VRegister& vd, const VRegister& vn);
   1464 
   1465   // FP convert to unsigned integer, round towards -infinity.
   1466   void fcvtmu(const VRegister& vd, const VRegister& vn);
   1467 
   1468   // FP convert to signed integer, nearest with ties to even.
   1469   void fcvtns(const Register& rd, const VRegister& vn);
   1470 
   1471   // FP convert to unsigned integer, nearest with ties to even.
   1472   void fcvtnu(const Register& rd, const VRegister& vn);
   1473 
   1474   // FP convert to signed integer, nearest with ties to even.
   1475   void fcvtns(const VRegister& rd, const VRegister& vn);
   1476 
   1477   // FP convert to unsigned integer, nearest with ties to even.
   1478   void fcvtnu(const VRegister& rd, const VRegister& vn);
   1479 
   1480   // FP convert to signed integer or fixed-point, round towards zero.
   1481   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
   1482 
   1483   // FP convert to unsigned integer or fixed-point, round towards zero.
   1484   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
   1485 
   1486   // FP convert to signed integer or fixed-point, round towards zero.
   1487   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
   1488 
   1489   // FP convert to unsigned integer or fixed-point, round towards zero.
   1490   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
   1491 
   1492   // FP convert to signed integer, round towards +infinity.
   1493   void fcvtps(const Register& rd, const VRegister& vn);
   1494 
   1495   // FP convert to unsigned integer, round towards +infinity.
   1496   void fcvtpu(const Register& rd, const VRegister& vn);
   1497 
   1498   // FP convert to signed integer, round towards +infinity.
   1499   void fcvtps(const VRegister& vd, const VRegister& vn);
   1500 
   1501   // FP convert to unsigned integer, round towards +infinity.
   1502   void fcvtpu(const VRegister& vd, const VRegister& vn);
   1503 
   1504   // Convert signed integer or fixed point to FP.
   1505   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
   1506 
   1507   // Convert unsigned integer or fixed point to FP.
   1508   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
   1509 
   1510   // Convert signed integer or fixed-point to FP.
   1511   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
   1512 
   1513   // Convert unsigned integer or fixed-point to FP.
   1514   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
   1515 
   1516   // Unsigned absolute difference.
   1517   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1518 
   1519   // Signed absolute difference.
   1520   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1521 
   1522   // Unsigned absolute difference and accumulate.
   1523   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1524 
   1525   // Signed absolute difference and accumulate.
   1526   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1527 
   1528   // Add.
   1529   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1530 
   1531   // Subtract.
   1532   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1533 
   1534   // Unsigned halving add.
   1535   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1536 
   1537   // Signed halving add.
   1538   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1539 
   1540   // Unsigned rounding halving add.
   1541   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1542 
   1543   // Signed rounding halving add.
   1544   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1545 
   1546   // Unsigned halving sub.
   1547   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1548 
   1549   // Signed halving sub.
   1550   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1551 
   1552   // Unsigned saturating add.
   1553   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1554 
   1555   // Signed saturating add.
   1556   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1557 
   1558   // Unsigned saturating subtract.
   1559   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1560 
   1561   // Signed saturating subtract.
   1562   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1563 
   1564   // Add pairwise.
   1565   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1566 
   1567   // Add pair of elements scalar.
   1568   void addp(const VRegister& vd, const VRegister& vn);
   1569 
   1570   // Multiply-add to accumulator.
   1571   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1572 
   1573   // Multiply-subtract to accumulator.
   1574   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1575 
   1576   // Multiply.
   1577   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1578 
   1579   // Multiply by scalar element.
   1580   void mul(const VRegister& vd,
   1581            const VRegister& vn,
   1582            const VRegister& vm,
   1583            int vm_index);
   1584 
   1585   // Multiply-add by scalar element.
   1586   void mla(const VRegister& vd,
   1587            const VRegister& vn,
   1588            const VRegister& vm,
   1589            int vm_index);
   1590 
   1591   // Multiply-subtract by scalar element.
   1592   void mls(const VRegister& vd,
   1593            const VRegister& vn,
   1594            const VRegister& vm,
   1595            int vm_index);
   1596 
   1597   // Signed long multiply-add by scalar element.
   1598   void smlal(const VRegister& vd,
   1599              const VRegister& vn,
   1600              const VRegister& vm,
   1601              int vm_index);
   1602 
   1603   // Signed long multiply-add by scalar element (second part).
   1604   void smlal2(const VRegister& vd,
   1605               const VRegister& vn,
   1606               const VRegister& vm,
   1607               int vm_index);
   1608 
   1609   // Unsigned long multiply-add by scalar element.
   1610   void umlal(const VRegister& vd,
   1611              const VRegister& vn,
   1612              const VRegister& vm,
   1613              int vm_index);
   1614 
   1615   // Unsigned long multiply-add by scalar element (second part).
   1616   void umlal2(const VRegister& vd,
   1617               const VRegister& vn,
   1618               const VRegister& vm,
   1619               int vm_index);
   1620 
   1621   // Signed long multiply-sub by scalar element.
   1622   void smlsl(const VRegister& vd,
   1623              const VRegister& vn,
   1624              const VRegister& vm,
   1625              int vm_index);
   1626 
   1627   // Signed long multiply-sub by scalar element (second part).
   1628   void smlsl2(const VRegister& vd,
   1629               const VRegister& vn,
   1630               const VRegister& vm,
   1631               int vm_index);
   1632 
   1633   // Unsigned long multiply-sub by scalar element.
   1634   void umlsl(const VRegister& vd,
   1635              const VRegister& vn,
   1636              const VRegister& vm,
   1637              int vm_index);
   1638 
   1639   // Unsigned long multiply-sub by scalar element (second part).
   1640   void umlsl2(const VRegister& vd,
   1641               const VRegister& vn,
   1642               const VRegister& vm,
   1643               int vm_index);
   1644 
   1645   // Signed long multiply by scalar element.
   1646   void smull(const VRegister& vd,
   1647              const VRegister& vn,
   1648              const VRegister& vm,
   1649              int vm_index);
   1650 
   1651   // Signed long multiply by scalar element (second part).
   1652   void smull2(const VRegister& vd,
   1653               const VRegister& vn,
   1654               const VRegister& vm,
   1655               int vm_index);
   1656 
   1657   // Unsigned long multiply by scalar element.
   1658   void umull(const VRegister& vd,
   1659              const VRegister& vn,
   1660              const VRegister& vm,
   1661              int vm_index);
   1662 
   1663   // Unsigned long multiply by scalar element (second part).
   1664   void umull2(const VRegister& vd,
   1665               const VRegister& vn,
   1666               const VRegister& vm,
   1667               int vm_index);
   1668 
   1669   // Signed saturating double long multiply by element.
   1670   void sqdmull(const VRegister& vd,
   1671                const VRegister& vn,
   1672                const VRegister& vm,
   1673                int vm_index);
   1674 
   1675   // Signed saturating double long multiply by element (second part).
   1676   void sqdmull2(const VRegister& vd,
   1677                 const VRegister& vn,
   1678                 const VRegister& vm,
   1679                 int vm_index);
   1680 
   1681   // Signed saturating doubling long multiply-add by element.
   1682   void sqdmlal(const VRegister& vd,
   1683                const VRegister& vn,
   1684                const VRegister& vm,
   1685                int vm_index);
   1686 
   1687   // Signed saturating doubling long multiply-add by element (second part).
   1688   void sqdmlal2(const VRegister& vd,
   1689                 const VRegister& vn,
   1690                 const VRegister& vm,
   1691                 int vm_index);
   1692 
   1693   // Signed saturating doubling long multiply-sub by element.
   1694   void sqdmlsl(const VRegister& vd,
   1695                const VRegister& vn,
   1696                const VRegister& vm,
   1697                int vm_index);
   1698 
   1699   // Signed saturating doubling long multiply-sub by element (second part).
   1700   void sqdmlsl2(const VRegister& vd,
   1701                 const VRegister& vn,
   1702                 const VRegister& vm,
   1703                 int vm_index);
   1704 
   1705   // Compare equal.
   1706   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1707 
   1708   // Compare signed greater than or equal.
   1709   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1710 
   1711   // Compare signed greater than.
   1712   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1713 
   1714   // Compare unsigned higher.
   1715   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1716 
   1717   // Compare unsigned higher or same.
   1718   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1719 
   1720   // Compare bitwise test bits nonzero.
   1721   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1722 
   1723   // Compare bitwise to zero.
   1724   void cmeq(const VRegister& vd, const VRegister& vn, int value);
   1725 
   1726   // Compare signed greater than or equal to zero.
   1727   void cmge(const VRegister& vd, const VRegister& vn, int value);
   1728 
   1729   // Compare signed greater than zero.
   1730   void cmgt(const VRegister& vd, const VRegister& vn, int value);
   1731 
   1732   // Compare signed less than or equal to zero.
   1733   void cmle(const VRegister& vd, const VRegister& vn, int value);
   1734 
   1735   // Compare signed less than zero.
   1736   void cmlt(const VRegister& vd, const VRegister& vn, int value);
   1737 
   1738   // Signed shift left by register.
   1739   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1740 
   1741   // Unsigned shift left by register.
   1742   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1743 
   1744   // Signed saturating shift left by register.
   1745   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1746 
   1747   // Unsigned saturating shift left by register.
   1748   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1749 
   1750   // Signed rounding shift left by register.
   1751   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1752 
   1753   // Unsigned rounding shift left by register.
   1754   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1755 
   1756   // Signed saturating rounding shift left by register.
   1757   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1758 
   1759   // Unsigned saturating rounding shift left by register.
   1760   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1761 
   1762   // Bitwise and.
   1763   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1764 
   1765   // Bitwise or.
   1766   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1767 
   1768   // Bitwise or immediate.
   1769   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
   1770 
   1771   // Move register to register.
   1772   void mov(const VRegister& vd, const VRegister& vn);
   1773 
   1774   // Bitwise orn.
   1775   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1776 
   1777   // Bitwise eor.
   1778   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1779 
   1780   // Bit clear immediate.
   1781   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
   1782 
   1783   // Bit clear.
   1784   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1785 
   1786   // Bitwise insert if false.
   1787   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1788 
   1789   // Bitwise insert if true.
   1790   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1791 
   1792   // Bitwise select.
   1793   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1794 
   1795   // Polynomial multiply.
   1796   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1797 
   1798   // Vector move immediate.
   1799   void movi(const VRegister& vd,
   1800             const uint64_t imm,
   1801             Shift shift = LSL,
   1802             const int shift_amount = 0);
   1803 
   1804   // Bitwise not.
   1805   void mvn(const VRegister& vd, const VRegister& vn);
   1806 
   1807   // Vector move inverted immediate.
   1808   void mvni(const VRegister& vd,
   1809             const int imm8,
   1810             Shift shift = LSL,
   1811             const int shift_amount = 0);
   1812 
   1813   // Signed saturating accumulate of unsigned value.
   1814   void suqadd(const VRegister& vd, const VRegister& vn);
   1815 
   1816   // Unsigned saturating accumulate of signed value.
   1817   void usqadd(const VRegister& vd, const VRegister& vn);
   1818 
   1819   // Absolute value.
   1820   void abs(const VRegister& vd, const VRegister& vn);
   1821 
   1822   // Signed saturating absolute value.
   1823   void sqabs(const VRegister& vd, const VRegister& vn);
   1824 
   1825   // Negate.
   1826   void neg(const VRegister& vd, const VRegister& vn);
   1827 
   1828   // Signed saturating negate.
   1829   void sqneg(const VRegister& vd, const VRegister& vn);
   1830 
   1831   // Bitwise not.
   1832   void not_(const VRegister& vd, const VRegister& vn);
   1833 
   1834   // Extract narrow.
   1835   void xtn(const VRegister& vd, const VRegister& vn);
   1836 
   1837   // Extract narrow (second part).
   1838   void xtn2(const VRegister& vd, const VRegister& vn);
   1839 
   1840   // Signed saturating extract narrow.
   1841   void sqxtn(const VRegister& vd, const VRegister& vn);
   1842 
   1843   // Signed saturating extract narrow (second part).
   1844   void sqxtn2(const VRegister& vd, const VRegister& vn);
   1845 
   1846   // Unsigned saturating extract narrow.
   1847   void uqxtn(const VRegister& vd, const VRegister& vn);
   1848 
   1849   // Unsigned saturating extract narrow (second part).
   1850   void uqxtn2(const VRegister& vd, const VRegister& vn);
   1851 
   1852   // Signed saturating extract unsigned narrow.
   1853   void sqxtun(const VRegister& vd, const VRegister& vn);
   1854 
   1855   // Signed saturating extract unsigned narrow (second part).
   1856   void sqxtun2(const VRegister& vd, const VRegister& vn);
   1857 
   1858   // Extract vector from pair of vectors.
   1859   void ext(const VRegister& vd,
   1860            const VRegister& vn,
   1861            const VRegister& vm,
   1862            int index);
   1863 
   1864   // Duplicate vector element to vector or scalar.
   1865   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
   1866 
   1867   // Move vector element to scalar.
   1868   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
   1869 
   1870   // Duplicate general-purpose register to vector.
   1871   void dup(const VRegister& vd, const Register& rn);
   1872 
   1873   // Insert vector element from another vector element.
   1874   void ins(const VRegister& vd,
   1875            int vd_index,
   1876            const VRegister& vn,
   1877            int vn_index);
   1878 
   1879   // Move vector element to another vector element.
   1880   void mov(const VRegister& vd,
   1881            int vd_index,
   1882            const VRegister& vn,
   1883            int vn_index);
   1884 
   1885   // Insert vector element from general-purpose register.
   1886   void ins(const VRegister& vd, int vd_index, const Register& rn);
   1887 
   1888   // Move general-purpose register to a vector element.
   1889   void mov(const VRegister& vd, int vd_index, const Register& rn);
   1890 
   1891   // Unsigned move vector element to general-purpose register.
   1892   void umov(const Register& rd, const VRegister& vn, int vn_index);
   1893 
   1894   // Move vector element to general-purpose register.
   1895   void mov(const Register& rd, const VRegister& vn, int vn_index);
   1896 
   1897   // Signed move vector element to general-purpose register.
   1898   void smov(const Register& rd, const VRegister& vn, int vn_index);
   1899 
   1900   // One-element structure load to one register.
   1901   void ld1(const VRegister& vt, const MemOperand& src);
   1902 
   1903   // One-element structure load to two registers.
   1904   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   1905 
   1906   // One-element structure load to three registers.
   1907   void ld1(const VRegister& vt,
   1908            const VRegister& vt2,
   1909            const VRegister& vt3,
   1910            const MemOperand& src);
   1911 
   1912   // One-element structure load to four registers.
   1913   void ld1(const VRegister& vt,
   1914            const VRegister& vt2,
   1915            const VRegister& vt3,
   1916            const VRegister& vt4,
   1917            const MemOperand& src);
   1918 
   1919   // One-element single structure load to one lane.
   1920   void ld1(const VRegister& vt, int lane, const MemOperand& src);
   1921 
   1922   // One-element single structure load to all lanes.
   1923   void ld1r(const VRegister& vt, const MemOperand& src);
   1924 
   1925   // Two-element structure load.
   1926   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   1927 
   1928   // Two-element single structure load to one lane.
   1929   void ld2(const VRegister& vt,
   1930            const VRegister& vt2,
   1931            int lane,
   1932            const MemOperand& src);
   1933 
   1934   // Two-element single structure load to all lanes.
   1935   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   1936 
   1937   // Three-element structure load.
   1938   void ld3(const VRegister& vt,
   1939            const VRegister& vt2,
   1940            const VRegister& vt3,
   1941            const MemOperand& src);
   1942 
   1943   // Three-element single structure load to one lane.
   1944   void ld3(const VRegister& vt,
   1945            const VRegister& vt2,
   1946            const VRegister& vt3,
   1947            int lane,
   1948            const MemOperand& src);
   1949 
   1950   // Three-element single structure load to all lanes.
   1951   void ld3r(const VRegister& vt,
   1952             const VRegister& vt2,
   1953             const VRegister& vt3,
   1954             const MemOperand& src);
   1955 
   1956   // Four-element structure load.
   1957   void ld4(const VRegister& vt,
   1958            const VRegister& vt2,
   1959            const VRegister& vt3,
   1960            const VRegister& vt4,
   1961            const MemOperand& src);
   1962 
   1963   // Four-element single structure load to one lane.
   1964   void ld4(const VRegister& vt,
   1965            const VRegister& vt2,
   1966            const VRegister& vt3,
   1967            const VRegister& vt4,
   1968            int lane,
   1969            const MemOperand& src);
   1970 
   1971   // Four-element single structure load to all lanes.
   1972   void ld4r(const VRegister& vt,
   1973             const VRegister& vt2,
   1974             const VRegister& vt3,
   1975             const VRegister& vt4,
   1976             const MemOperand& src);
   1977 
   1978   // Count leading sign bits.
   1979   void cls(const VRegister& vd, const VRegister& vn);
   1980 
   1981   // Count leading zero bits (vector).
   1982   void clz(const VRegister& vd, const VRegister& vn);
   1983 
   1984   // Population count per byte.
   1985   void cnt(const VRegister& vd, const VRegister& vn);
   1986 
   1987   // Reverse bit order.
   1988   void rbit(const VRegister& vd, const VRegister& vn);
   1989 
   1990   // Reverse elements in 16-bit halfwords.
   1991   void rev16(const VRegister& vd, const VRegister& vn);
   1992 
   1993   // Reverse elements in 32-bit words.
   1994   void rev32(const VRegister& vd, const VRegister& vn);
   1995 
   1996   // Reverse elements in 64-bit doublewords.
   1997   void rev64(const VRegister& vd, const VRegister& vn);
   1998 
   1999   // Unsigned reciprocal square root estimate.
   2000   void ursqrte(const VRegister& vd, const VRegister& vn);
   2001 
   2002   // Unsigned reciprocal estimate.
   2003   void urecpe(const VRegister& vd, const VRegister& vn);
   2004 
   2005   // Signed pairwise long add.
   2006   void saddlp(const VRegister& vd, const VRegister& vn);
   2007 
   2008   // Unsigned pairwise long add.
   2009   void uaddlp(const VRegister& vd, const VRegister& vn);
   2010 
   2011   // Signed pairwise long add and accumulate.
   2012   void sadalp(const VRegister& vd, const VRegister& vn);
   2013 
   2014   // Unsigned pairwise long add and accumulate.
   2015   void uadalp(const VRegister& vd, const VRegister& vn);
   2016 
   2017   // Shift left by immediate.
   2018   void shl(const VRegister& vd, const VRegister& vn, int shift);
   2019 
   2020   // Signed saturating shift left by immediate.
   2021   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
   2022 
   2023   // Signed saturating shift left unsigned by immediate.
   2024   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
   2025 
   2026   // Unsigned saturating shift left by immediate.
   2027   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
   2028 
   2029   // Signed shift left long by immediate.
   2030   void sshll(const VRegister& vd, const VRegister& vn, int shift);
   2031 
   2032   // Signed shift left long by immediate (second part).
   2033   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
   2034 
   2035   // Signed extend long.
   2036   void sxtl(const VRegister& vd, const VRegister& vn);
   2037 
   2038   // Signed extend long (second part).
   2039   void sxtl2(const VRegister& vd, const VRegister& vn);
   2040 
   2041   // Unsigned shift left long by immediate.
   2042   void ushll(const VRegister& vd, const VRegister& vn, int shift);
   2043 
   2044   // Unsigned shift left long by immediate (second part).
   2045   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
   2046 
   2047   // Shift left long by element size.
   2048   void shll(const VRegister& vd, const VRegister& vn, int shift);
   2049 
   2050   // Shift left long by element size (second part).
   2051   void shll2(const VRegister& vd, const VRegister& vn, int shift);
   2052 
   2053   // Unsigned extend long.
   2054   void uxtl(const VRegister& vd, const VRegister& vn);
   2055 
   2056   // Unsigned extend long (second part).
   2057   void uxtl2(const VRegister& vd, const VRegister& vn);
   2058 
   2059   // Shift left by immediate and insert.
   2060   void sli(const VRegister& vd, const VRegister& vn, int shift);
   2061 
   2062   // Shift right by immediate and insert.
   2063   void sri(const VRegister& vd, const VRegister& vn, int shift);
   2064 
   2065   // Signed maximum.
   2066   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2067 
   2068   // Signed pairwise maximum.
   2069   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2070 
   2071   // Add across vector.
   2072   void addv(const VRegister& vd, const VRegister& vn);
   2073 
   2074   // Signed add long across vector.
   2075   void saddlv(const VRegister& vd, const VRegister& vn);
   2076 
   2077   // Unsigned add long across vector.
   2078   void uaddlv(const VRegister& vd, const VRegister& vn);
   2079 
   2080   // FP maximum number across vector.
   2081   void fmaxnmv(const VRegister& vd, const VRegister& vn);
   2082 
   2083   // FP maximum across vector.
   2084   void fmaxv(const VRegister& vd, const VRegister& vn);
   2085 
   2086   // FP minimum number across vector.
   2087   void fminnmv(const VRegister& vd, const VRegister& vn);
   2088 
   2089   // FP minimum across vector.
   2090   void fminv(const VRegister& vd, const VRegister& vn);
   2091 
   2092   // Signed maximum across vector.
   2093   void smaxv(const VRegister& vd, const VRegister& vn);
   2094 
   2095   // Signed minimum.
   2096   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2097 
   2098   // Signed minimum pairwise.
   2099   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2100 
   2101   // Signed minimum across vector.
   2102   void sminv(const VRegister& vd, const VRegister& vn);
   2103 
   2104   // One-element structure store from one register.
   2105   void st1(const VRegister& vt, const MemOperand& src);
   2106 
   2107   // One-element structure store from two registers.
   2108   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   2109 
   2110   // One-element structure store from three registers.
   2111   void st1(const VRegister& vt,
   2112            const VRegister& vt2,
   2113            const VRegister& vt3,
   2114            const MemOperand& src);
   2115 
   2116   // One-element structure store from four registers.
   2117   void st1(const VRegister& vt,
   2118            const VRegister& vt2,
   2119            const VRegister& vt3,
   2120            const VRegister& vt4,
   2121            const MemOperand& src);
   2122 
   2123   // One-element single structure store from one lane.
   2124   void st1(const VRegister& vt, int lane, const MemOperand& src);
   2125 
   2126   // Two-element structure store from two registers.
   2127   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
   2128 
   2129   // Two-element single structure store from two lanes.
   2130   void st2(const VRegister& vt,
   2131            const VRegister& vt2,
   2132            int lane,
   2133            const MemOperand& src);
   2134 
   2135   // Three-element structure store from three registers.
   2136   void st3(const VRegister& vt,
   2137            const VRegister& vt2,
   2138            const VRegister& vt3,
   2139            const MemOperand& src);
   2140 
   2141   // Three-element single structure store from three lanes.
   2142   void st3(const VRegister& vt,
   2143            const VRegister& vt2,
   2144            const VRegister& vt3,
   2145            int lane,
   2146            const MemOperand& src);
   2147 
   2148   // Four-element structure store from four registers.
   2149   void st4(const VRegister& vt,
   2150            const VRegister& vt2,
   2151            const VRegister& vt3,
   2152            const VRegister& vt4,
   2153            const MemOperand& src);
   2154 
   2155   // Four-element single structure store from four lanes.
   2156   void st4(const VRegister& vt,
   2157            const VRegister& vt2,
   2158            const VRegister& vt3,
   2159            const VRegister& vt4,
   2160            int lane,
   2161            const MemOperand& src);
   2162 
   2163   // Unsigned add long.
   2164   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2165 
   2166   // Unsigned add long (second part).
   2167   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2168 
   2169   // Unsigned add wide.
   2170   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2171 
   2172   // Unsigned add wide (second part).
   2173   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2174 
   2175   // Signed add long.
   2176   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2177 
   2178   // Signed add long (second part).
   2179   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2180 
   2181   // Signed add wide.
   2182   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2183 
   2184   // Signed add wide (second part).
   2185   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2186 
   2187   // Unsigned subtract long.
   2188   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2189 
   2190   // Unsigned subtract long (second part).
   2191   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2192 
   2193   // Unsigned subtract wide.
   2194   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2195 
   2196   // Unsigned subtract wide (second part).
   2197   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2198 
   2199   // Signed subtract long.
   2200   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2201 
   2202   // Signed subtract long (second part).
   2203   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2204 
   2205   // Signed integer subtract wide.
   2206   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2207 
   2208   // Signed integer subtract wide (second part).
   2209   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2210 
   2211   // Unsigned maximum.
   2212   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2213 
   2214   // Unsigned pairwise maximum.
   2215   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2216 
   2217   // Unsigned maximum across vector.
   2218   void umaxv(const VRegister& vd, const VRegister& vn);
   2219 
   2220   // Unsigned minimum.
   2221   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2222 
   2223   // Unsigned pairwise minimum.
   2224   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2225 
   2226   // Unsigned minimum across vector.
   2227   void uminv(const VRegister& vd, const VRegister& vn);
   2228 
   2229   // Transpose vectors (primary).
   2230   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2231 
   2232   // Transpose vectors (secondary).
   2233   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2234 
   2235   // Unzip vectors (primary).
   2236   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2237 
   2238   // Unzip vectors (secondary).
   2239   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2240 
   2241   // Zip vectors (primary).
   2242   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2243 
   2244   // Zip vectors (secondary).
   2245   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2246 
   2247   // Signed shift right by immediate.
   2248   void sshr(const VRegister& vd, const VRegister& vn, int shift);
   2249 
   2250   // Unsigned shift right by immediate.
   2251   void ushr(const VRegister& vd, const VRegister& vn, int shift);
   2252 
   2253   // Signed rounding shift right by immediate.
   2254   void srshr(const VRegister& vd, const VRegister& vn, int shift);
   2255 
   2256   // Unsigned rounding shift right by immediate.
   2257   void urshr(const VRegister& vd, const VRegister& vn, int shift);
   2258 
   2259   // Signed shift right by immediate and accumulate.
   2260   void ssra(const VRegister& vd, const VRegister& vn, int shift);
   2261 
   2262   // Unsigned shift right by immediate and accumulate.
   2263   void usra(const VRegister& vd, const VRegister& vn, int shift);
   2264 
   2265   // Signed rounding shift right by immediate and accumulate.
   2266   void srsra(const VRegister& vd, const VRegister& vn, int shift);
   2267 
   2268   // Unsigned rounding shift right by immediate and accumulate.
   2269   void ursra(const VRegister& vd, const VRegister& vn, int shift);
   2270 
   2271   // Shift right narrow by immediate.
   2272   void shrn(const VRegister& vd, const VRegister& vn, int shift);
   2273 
   2274   // Shift right narrow by immediate (second part).
   2275   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
   2276 
   2277   // Rounding shift right narrow by immediate.
   2278   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
   2279 
   2280   // Rounding shift right narrow by immediate (second part).
   2281   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2282 
   2283   // Unsigned saturating shift right narrow by immediate.
   2284   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
   2285 
   2286   // Unsigned saturating shift right narrow by immediate (second part).
   2287   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2288 
   2289   // Unsigned saturating rounding shift right narrow by immediate.
   2290   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
   2291 
   2292   // Unsigned saturating rounding shift right narrow by immediate (second part).
   2293   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2294 
   2295   // Signed saturating shift right narrow by immediate.
   2296   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
   2297 
   2298   // Signed saturating shift right narrow by immediate (second part).
   2299   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2300 
   2301   // Signed saturating rounded shift right narrow by immediate.
   2302   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
   2303 
   2304   // Signed saturating rounded shift right narrow by immediate (second part).
   2305   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
   2306 
   2307   // Signed saturating shift right unsigned narrow by immediate.
   2308   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
   2309 
   2310   // Signed saturating shift right unsigned narrow by immediate (second part).
   2311   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
   2312 
   2313   // Signed sat rounded shift right unsigned narrow by immediate.
   2314   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
   2315 
   2316   // Signed sat rounded shift right unsigned narrow by immediate (second part).
   2317   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
   2318 
   2319   // FP reciprocal step.
   2320   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2321 
   2322   // FP reciprocal estimate.
   2323   void frecpe(const VRegister& vd, const VRegister& vn);
   2324 
   2325   // FP reciprocal square root estimate.
   2326   void frsqrte(const VRegister& vd, const VRegister& vn);
   2327 
   2328   // FP reciprocal square root step.
   2329   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2330 
   2331   // Signed absolute difference and accumulate long.
   2332   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2333 
   2334   // Signed absolute difference and accumulate long (second part).
   2335   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2336 
   2337   // Unsigned absolute difference and accumulate long.
   2338   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2339 
   2340   // Unsigned absolute difference and accumulate long (second part).
   2341   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2342 
   2343   // Signed absolute difference long.
   2344   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2345 
   2346   // Signed absolute difference long (second part).
   2347   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2348 
   2349   // Unsigned absolute difference long.
   2350   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2351 
   2352   // Unsigned absolute difference long (second part).
   2353   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2354 
   2355   // Polynomial multiply long.
   2356   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2357 
   2358   // Polynomial multiply long (second part).
   2359   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2360 
   2361   // Signed long multiply-add.
   2362   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2363 
   2364   // Signed long multiply-add (second part).
   2365   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2366 
   2367   // Unsigned long multiply-add.
   2368   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2369 
   2370   // Unsigned long multiply-add (second part).
   2371   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2372 
   2373   // Signed long multiply-sub.
   2374   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2375 
   2376   // Signed long multiply-sub (second part).
   2377   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2378 
   2379   // Unsigned long multiply-sub.
   2380   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2381 
   2382   // Unsigned long multiply-sub (second part).
   2383   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2384 
   2385   // Signed long multiply.
   2386   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2387 
   2388   // Signed long multiply (second part).
   2389   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2390 
   2391   // Signed saturating doubling long multiply-add.
   2392   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2393 
   2394   // Signed saturating doubling long multiply-add (second part).
   2395   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2396 
   2397   // Signed saturating doubling long multiply-subtract.
   2398   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2399 
   2400   // Signed saturating doubling long multiply-subtract (second part).
   2401   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2402 
   2403   // Signed saturating doubling long multiply.
   2404   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2405 
   2406   // Signed saturating doubling long multiply (second part).
   2407   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2408 
   2409   // Signed saturating doubling multiply returning high half.
   2410   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2411 
   2412   // Signed saturating rounding doubling multiply returning high half.
   2413   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2414 
   2415   // Signed saturating doubling multiply element returning high half.
   2416   void sqdmulh(const VRegister& vd,
   2417                const VRegister& vn,
   2418                const VRegister& vm,
   2419                int vm_index);
   2420 
   2421   // Signed saturating rounding doubling multiply element returning high half.
   2422   void sqrdmulh(const VRegister& vd,
   2423                 const VRegister& vn,
   2424                 const VRegister& vm,
   2425                 int vm_index);
   2426 
   2427   // Unsigned long multiply long.
   2428   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2429 
   2430   // Unsigned long multiply (second part).
   2431   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2432 
   2433   // Add narrow returning high half.
   2434   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2435 
   2436   // Add narrow returning high half (second part).
   2437   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2438 
   2439   // Rounding add narrow returning high half.
   2440   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2441 
   2442   // Rounding add narrow returning high half (second part).
   2443   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2444 
   2445   // Subtract narrow returning high half.
   2446   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2447 
   2448   // Subtract narrow returning high half (second part).
   2449   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2450 
   2451   // Rounding subtract narrow returning high half.
   2452   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2453 
   2454   // Rounding subtract narrow returning high half (second part).
   2455   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2456 
   2457   // FP vector multiply accumulate.
   2458   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2459 
   2460   // FP vector multiply subtract.
   2461   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2462 
   2463   // FP vector multiply extended.
   2464   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2465 
   2466   // FP absolute greater than or equal.
   2467   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2468 
   2469   // FP absolute greater than.
   2470   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2471 
   2472   // FP multiply by element.
   2473   void fmul(const VRegister& vd,
   2474             const VRegister& vn,
   2475             const VRegister& vm,
   2476             int vm_index);
   2477 
   2478   // FP fused multiply-add to accumulator by element.
   2479   void fmla(const VRegister& vd,
   2480             const VRegister& vn,
   2481             const VRegister& vm,
   2482             int vm_index);
   2483 
   2484   // FP fused multiply-sub from accumulator by element.
   2485   void fmls(const VRegister& vd,
   2486             const VRegister& vn,
   2487             const VRegister& vm,
   2488             int vm_index);
   2489 
   2490   // FP multiply extended by element.
   2491   void fmulx(const VRegister& vd,
   2492              const VRegister& vn,
   2493              const VRegister& vm,
   2494              int vm_index);
   2495 
   2496   // FP compare equal.
   2497   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2498 
   2499   // FP greater than.
   2500   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2501 
   2502   // FP greater than or equal.
   2503   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2504 
   2505   // FP compare equal to zero.
   2506   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
   2507 
   2508   // FP greater than zero.
   2509   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
   2510 
   2511   // FP greater than or equal to zero.
   2512   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
   2513 
   2514   // FP less than or equal to zero.
   2515   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
   2516 
   2517   // FP less than to zero.
   2518   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
   2519 
   2520   // FP absolute difference.
   2521   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2522 
   2523   // FP pairwise add vector.
   2524   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2525 
   2526   // FP pairwise add scalar.
   2527   void faddp(const VRegister& vd, const VRegister& vn);
   2528 
   2529   // FP pairwise maximum vector.
   2530   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2531 
   2532   // FP pairwise maximum scalar.
   2533   void fmaxp(const VRegister& vd, const VRegister& vn);
   2534 
   2535   // FP pairwise minimum vector.
   2536   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2537 
   2538   // FP pairwise minimum scalar.
   2539   void fminp(const VRegister& vd, const VRegister& vn);
   2540 
   2541   // FP pairwise maximum number vector.
   2542   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2543 
   2544   // FP pairwise maximum number scalar.
   2545   void fmaxnmp(const VRegister& vd, const VRegister& vn);
   2546 
   2547   // FP pairwise minimum number vector.
   2548   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   2549 
   2550   // FP pairwise minimum number scalar.
   2551   void fminnmp(const VRegister& vd, const VRegister& vn);
   2552 
   2553   // Emit generic instructions.
   2554   // Emit raw instructions into the instruction stream.
   2555   void dci(Instr raw_inst) { Emit(raw_inst); }
   2556 
   2557   // Emit 32 bits of data into the instruction stream.
   2558   void dc32(uint32_t data) { dc(data); }
   2559 
   2560   // Emit 64 bits of data into the instruction stream.
   2561   void dc64(uint64_t data) { dc(data); }
   2562 
   2563   // Emit data in the instruction stream.
   2564   template <typename T>
   2565   void dc(T data) {
   2566     VIXL_ASSERT(AllowAssembler());
   2567     GetBuffer()->Emit<T>(data);
   2568   }
   2569 
   2570   // Copy a string into the instruction stream, including the terminating NULL
   2571   // character. The instruction pointer is then aligned correctly for
   2572   // subsequent instructions.
   2573   void EmitString(const char* string) {
   2574     VIXL_ASSERT(string != NULL);
   2575     VIXL_ASSERT(AllowAssembler());
   2576 
   2577     GetBuffer()->EmitString(string);
   2578     GetBuffer()->Align();
   2579   }
   2580 
   2581   // Code generation helpers.
   2582 
   2583   // Register encoding.
   2584   static Instr Rd(CPURegister rd) {
   2585     VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode);
   2586     return rd.GetCode() << Rd_offset;
   2587   }
   2588 
   2589   static Instr Rn(CPURegister rn) {
   2590     VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode);
   2591     return rn.GetCode() << Rn_offset;
   2592   }
   2593 
   2594   static Instr Rm(CPURegister rm) {
   2595     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
   2596     return rm.GetCode() << Rm_offset;
   2597   }
   2598 
   2599   static Instr RmNot31(CPURegister rm) {
   2600     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
   2601     VIXL_ASSERT(!rm.IsZero());
   2602     return Rm(rm);
   2603   }
   2604 
   2605   static Instr Ra(CPURegister ra) {
   2606     VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode);
   2607     return ra.GetCode() << Ra_offset;
   2608   }
   2609 
   2610   static Instr Rt(CPURegister rt) {
   2611     VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode);
   2612     return rt.GetCode() << Rt_offset;
   2613   }
   2614 
   2615   static Instr Rt2(CPURegister rt2) {
   2616     VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode);
   2617     return rt2.GetCode() << Rt2_offset;
   2618   }
   2619 
   2620   static Instr Rs(CPURegister rs) {
   2621     VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode);
   2622     return rs.GetCode() << Rs_offset;
   2623   }
   2624 
   2625   // These encoding functions allow the stack pointer to be encoded, and
   2626   // disallow the zero register.
   2627   static Instr RdSP(Register rd) {
   2628     VIXL_ASSERT(!rd.IsZero());
   2629     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
   2630   }
   2631 
   2632   static Instr RnSP(Register rn) {
   2633     VIXL_ASSERT(!rn.IsZero());
   2634     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
   2635   }
   2636 
   2637   // Flags encoding.
   2638   static Instr Flags(FlagsUpdate S) {
   2639     if (S == SetFlags) {
   2640       return 1 << FlagsUpdate_offset;
   2641     } else if (S == LeaveFlags) {
   2642       return 0 << FlagsUpdate_offset;
   2643     }
   2644     VIXL_UNREACHABLE();
   2645     return 0;
   2646   }
   2647 
   2648   static Instr Cond(Condition cond) { return cond << Condition_offset; }
   2649 
   2650   // PC-relative address encoding.
   2651   static Instr ImmPCRelAddress(int64_t imm21) {
   2652     VIXL_ASSERT(IsInt21(imm21));
   2653     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
   2654     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
   2655     Instr immlo = imm << ImmPCRelLo_offset;
   2656     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
   2657   }
   2658 
   2659   // Branch encoding.
   2660   static Instr ImmUncondBranch(int64_t imm26) {
   2661     VIXL_ASSERT(IsInt26(imm26));
   2662     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
   2663   }
   2664 
   2665   static Instr ImmCondBranch(int64_t imm19) {
   2666     VIXL_ASSERT(IsInt19(imm19));
   2667     return TruncateToUint19(imm19) << ImmCondBranch_offset;
   2668   }
   2669 
   2670   static Instr ImmCmpBranch(int64_t imm19) {
   2671     VIXL_ASSERT(IsInt19(imm19));
   2672     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
   2673   }
   2674 
   2675   static Instr ImmTestBranch(int64_t imm14) {
   2676     VIXL_ASSERT(IsInt14(imm14));
   2677     return TruncateToUint14(imm14) << ImmTestBranch_offset;
   2678   }
   2679 
   2680   static Instr ImmTestBranchBit(unsigned bit_pos) {
   2681     VIXL_ASSERT(IsUint6(bit_pos));
   2682     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
   2683     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
   2684     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
   2685     b5 &= ImmTestBranchBit5_mask;
   2686     b40 &= ImmTestBranchBit40_mask;
   2687     return b5 | b40;
   2688   }
   2689 
   2690   // Data Processing encoding.
   2691   static Instr SF(Register rd) {
   2692     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
   2693   }
   2694 
   2695   static Instr ImmAddSub(int imm) {
   2696     VIXL_ASSERT(IsImmAddSub(imm));
   2697     if (IsUint12(imm)) {  // No shift required.
   2698       imm <<= ImmAddSub_offset;
   2699     } else {
   2700       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
   2701     }
   2702     return imm;
   2703   }
   2704 
   2705   static Instr ImmS(unsigned imms, unsigned reg_size) {
   2706     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
   2707                 ((reg_size == kWRegSize) && IsUint5(imms)));
   2708     USE(reg_size);
   2709     return imms << ImmS_offset;
   2710   }
   2711 
   2712   static Instr ImmR(unsigned immr, unsigned reg_size) {
   2713     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
   2714                 ((reg_size == kWRegSize) && IsUint5(immr)));
   2715     USE(reg_size);
   2716     VIXL_ASSERT(IsUint6(immr));
   2717     return immr << ImmR_offset;
   2718   }
   2719 
   2720   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
   2721     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   2722     VIXL_ASSERT(IsUint6(imms));
   2723     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
   2724     USE(reg_size);
   2725     return imms << ImmSetBits_offset;
   2726   }
   2727 
   2728   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
   2729     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   2730     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
   2731                 ((reg_size == kWRegSize) && IsUint5(immr)));
   2732     USE(reg_size);
   2733     return immr << ImmRotate_offset;
   2734   }
   2735 
   2736   static Instr ImmLLiteral(int64_t imm19) {
   2737     VIXL_ASSERT(IsInt19(imm19));
   2738     return TruncateToUint19(imm19) << ImmLLiteral_offset;
   2739   }
   2740 
   2741   static Instr BitN(unsigned bitn, unsigned reg_size) {
   2742     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   2743     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
   2744     USE(reg_size);
   2745     return bitn << BitN_offset;
   2746   }
   2747 
   2748   static Instr ShiftDP(Shift shift) {
   2749     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
   2750     return shift << ShiftDP_offset;
   2751   }
   2752 
   2753   static Instr ImmDPShift(unsigned amount) {
   2754     VIXL_ASSERT(IsUint6(amount));
   2755     return amount << ImmDPShift_offset;
   2756   }
   2757 
   2758   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
   2759 
   2760   static Instr ImmExtendShift(unsigned left_shift) {
   2761     VIXL_ASSERT(left_shift <= 4);
   2762     return left_shift << ImmExtendShift_offset;
   2763   }
   2764 
   2765   static Instr ImmCondCmp(unsigned imm) {
   2766     VIXL_ASSERT(IsUint5(imm));
   2767     return imm << ImmCondCmp_offset;
   2768   }
   2769 
   2770   static Instr Nzcv(StatusFlags nzcv) {
   2771     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
   2772   }
   2773 
   2774   // MemOperand offset encoding.
   2775   static Instr ImmLSUnsigned(int64_t imm12) {
   2776     VIXL_ASSERT(IsUint12(imm12));
   2777     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
   2778   }
   2779 
   2780   static Instr ImmLS(int64_t imm9) {
   2781     VIXL_ASSERT(IsInt9(imm9));
   2782     return TruncateToUint9(imm9) << ImmLS_offset;
   2783   }
   2784 
   2785   static Instr ImmLSPair(int64_t imm7, unsigned access_size) {
   2786     VIXL_ASSERT(IsMultiple(imm7, 1 << access_size));
   2787     int64_t scaled_imm7 = imm7 / (1 << access_size);
   2788     VIXL_ASSERT(IsInt7(scaled_imm7));
   2789     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
   2790   }
   2791 
   2792   static Instr ImmShiftLS(unsigned shift_amount) {
   2793     VIXL_ASSERT(IsUint1(shift_amount));
   2794     return shift_amount << ImmShiftLS_offset;
   2795   }
   2796 
   2797   static Instr ImmPrefetchOperation(int imm5) {
   2798     VIXL_ASSERT(IsUint5(imm5));
   2799     return imm5 << ImmPrefetchOperation_offset;
   2800   }
   2801 
   2802   static Instr ImmException(int imm16) {
   2803     VIXL_ASSERT(IsUint16(imm16));
   2804     return imm16 << ImmException_offset;
   2805   }
   2806 
   2807   static Instr ImmSystemRegister(int imm16) {
   2808     VIXL_ASSERT(IsUint16(imm16));
   2809     return imm16 << ImmSystemRegister_offset;
   2810   }
   2811 
   2812   static Instr ImmHint(int imm7) {
   2813     VIXL_ASSERT(IsUint7(imm7));
   2814     return imm7 << ImmHint_offset;
   2815   }
   2816 
   2817   static Instr CRm(int imm4) {
   2818     VIXL_ASSERT(IsUint4(imm4));
   2819     return imm4 << CRm_offset;
   2820   }
   2821 
   2822   static Instr CRn(int imm4) {
   2823     VIXL_ASSERT(IsUint4(imm4));
   2824     return imm4 << CRn_offset;
   2825   }
   2826 
   2827   static Instr SysOp(int imm14) {
   2828     VIXL_ASSERT(IsUint14(imm14));
   2829     return imm14 << SysOp_offset;
   2830   }
   2831 
   2832   static Instr ImmSysOp1(int imm3) {
   2833     VIXL_ASSERT(IsUint3(imm3));
   2834     return imm3 << SysOp1_offset;
   2835   }
   2836 
   2837   static Instr ImmSysOp2(int imm3) {
   2838     VIXL_ASSERT(IsUint3(imm3));
   2839     return imm3 << SysOp2_offset;
   2840   }
   2841 
   2842   static Instr ImmBarrierDomain(int imm2) {
   2843     VIXL_ASSERT(IsUint2(imm2));
   2844     return imm2 << ImmBarrierDomain_offset;
   2845   }
   2846 
   2847   static Instr ImmBarrierType(int imm2) {
   2848     VIXL_ASSERT(IsUint2(imm2));
   2849     return imm2 << ImmBarrierType_offset;
   2850   }
   2851 
   2852   // Move immediates encoding.
   2853   static Instr ImmMoveWide(uint64_t imm) {
   2854     VIXL_ASSERT(IsUint16(imm));
   2855     return static_cast<Instr>(imm << ImmMoveWide_offset);
   2856   }
   2857 
   2858   static Instr ShiftMoveWide(int64_t shift) {
   2859     VIXL_ASSERT(IsUint2(shift));
   2860     return static_cast<Instr>(shift << ShiftMoveWide_offset);
   2861   }
   2862 
   2863   // FP Immediates.
   2864   static Instr ImmFP32(float imm);
   2865   static Instr ImmFP64(double imm);
   2866 
   2867   // FP register type.
   2868   static Instr FPType(FPRegister fd) { return fd.Is64Bits() ? FP64 : FP32; }
   2869 
   2870   static Instr FPScale(unsigned scale) {
   2871     VIXL_ASSERT(IsUint6(scale));
   2872     return scale << FPScale_offset;
   2873   }
   2874 
   2875   // Immediate field checking helpers.
   2876   static bool IsImmAddSub(int64_t immediate);
   2877   static bool IsImmConditionalCompare(int64_t immediate);
   2878   static bool IsImmFP32(float imm);
   2879   static bool IsImmFP64(double imm);
   2880   static bool IsImmLogical(uint64_t value,
   2881                            unsigned width,
   2882                            unsigned* n = NULL,
   2883                            unsigned* imm_s = NULL,
   2884                            unsigned* imm_r = NULL);
   2885   static bool IsImmLSPair(int64_t offset, unsigned access_size);
   2886   static bool IsImmLSScaled(int64_t offset, unsigned access_size);
   2887   static bool IsImmLSUnscaled(int64_t offset);
   2888   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
   2889   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
   2890 
   2891   // Instruction bits for vector format in data processing operations.
   2892   static Instr VFormat(VRegister vd) {
   2893     if (vd.Is64Bits()) {
   2894       switch (vd.GetLanes()) {
   2895         case 2:
   2896           return NEON_2S;
   2897         case 4:
   2898           return NEON_4H;
   2899         case 8:
   2900           return NEON_8B;
   2901         default:
   2902           return 0xffffffff;
   2903       }
   2904     } else {
   2905       VIXL_ASSERT(vd.Is128Bits());
   2906       switch (vd.GetLanes()) {
   2907         case 2:
   2908           return NEON_2D;
   2909         case 4:
   2910           return NEON_4S;
   2911         case 8:
   2912           return NEON_8H;
   2913         case 16:
   2914           return NEON_16B;
   2915         default:
   2916           return 0xffffffff;
   2917       }
   2918     }
   2919   }
   2920 
   2921   // Instruction bits for vector format in floating point data processing
   2922   // operations.
   2923   static Instr FPFormat(VRegister vd) {
   2924     if (vd.GetLanes() == 1) {
   2925       // Floating point scalar formats.
   2926       VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
   2927       return vd.Is64Bits() ? FP64 : FP32;
   2928     }
   2929 
   2930     // Two lane floating point vector formats.
   2931     if (vd.GetLanes() == 2) {
   2932       VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
   2933       return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
   2934     }
   2935 
   2936     // Four lane floating point vector format.
   2937     VIXL_ASSERT((vd.GetLanes() == 4) && vd.Is128Bits());
   2938     return NEON_FP_4S;
   2939   }
   2940 
   2941   // Instruction bits for vector format in load and store operations.
   2942   static Instr LSVFormat(VRegister vd) {
   2943     if (vd.Is64Bits()) {
   2944       switch (vd.GetLanes()) {
   2945         case 1:
   2946           return LS_NEON_1D;
   2947         case 2:
   2948           return LS_NEON_2S;
   2949         case 4:
   2950           return LS_NEON_4H;
   2951         case 8:
   2952           return LS_NEON_8B;
   2953         default:
   2954           return 0xffffffff;
   2955       }
   2956     } else {
   2957       VIXL_ASSERT(vd.Is128Bits());
   2958       switch (vd.GetLanes()) {
   2959         case 2:
   2960           return LS_NEON_2D;
   2961         case 4:
   2962           return LS_NEON_4S;
   2963         case 8:
   2964           return LS_NEON_8H;
   2965         case 16:
   2966           return LS_NEON_16B;
   2967         default:
   2968           return 0xffffffff;
   2969       }
   2970     }
   2971   }
   2972 
   2973   // Instruction bits for scalar format in data processing operations.
   2974   static Instr SFormat(VRegister vd) {
   2975     VIXL_ASSERT(vd.GetLanes() == 1);
   2976     switch (vd.GetSizeInBytes()) {
   2977       case 1:
   2978         return NEON_B;
   2979       case 2:
   2980         return NEON_H;
   2981       case 4:
   2982         return NEON_S;
   2983       case 8:
   2984         return NEON_D;
   2985       default:
   2986         return 0xffffffff;
   2987     }
   2988   }
   2989 
   2990   static Instr ImmNEONHLM(int index, int num_bits) {
   2991     int h, l, m;
   2992     if (num_bits == 3) {
   2993       VIXL_ASSERT(IsUint3(index));
   2994       h = (index >> 2) & 1;
   2995       l = (index >> 1) & 1;
   2996       m = (index >> 0) & 1;
   2997     } else if (num_bits == 2) {
   2998       VIXL_ASSERT(IsUint2(index));
   2999       h = (index >> 1) & 1;
   3000       l = (index >> 0) & 1;
   3001       m = 0;
   3002     } else {
   3003       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
   3004       h = (index >> 0) & 1;
   3005       l = 0;
   3006       m = 0;
   3007     }
   3008     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
   3009   }
   3010 
   3011   static Instr ImmNEONExt(int imm4) {
   3012     VIXL_ASSERT(IsUint4(imm4));
   3013     return imm4 << ImmNEONExt_offset;
   3014   }
   3015 
   3016   static Instr ImmNEON5(Instr format, int index) {
   3017     VIXL_ASSERT(IsUint4(index));
   3018     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
   3019     int imm5 = (index << (s + 1)) | (1 << s);
   3020     return imm5 << ImmNEON5_offset;
   3021   }
   3022 
   3023   static Instr ImmNEON4(Instr format, int index) {
   3024     VIXL_ASSERT(IsUint4(index));
   3025     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
   3026     int imm4 = index << s;
   3027     return imm4 << ImmNEON4_offset;
   3028   }
   3029 
   3030   static Instr ImmNEONabcdefgh(int imm8) {
   3031     VIXL_ASSERT(IsUint8(imm8));
   3032     Instr instr;
   3033     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
   3034     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
   3035     return instr;
   3036   }
   3037 
   3038   static Instr NEONCmode(int cmode) {
   3039     VIXL_ASSERT(IsUint4(cmode));
   3040     return cmode << NEONCmode_offset;
   3041   }
   3042 
   3043   static Instr NEONModImmOp(int op) {
   3044     VIXL_ASSERT(IsUint1(op));
   3045     return op << NEONModImmOp_offset;
   3046   }
   3047 
   3048   // Size of the code generated since label to the current position.
   3049   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
   3050     VIXL_ASSERT(label->IsBound());
   3051     return GetBuffer().GetOffsetFrom(label->GetLocation());
   3052   }
   3053   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
   3054                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
   3055     return GetSizeOfCodeGeneratedSince(label);
   3056   }
   3057 
   3058   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
   3059                   size_t GetBufferCapacity() const) {
   3060     return GetBuffer().GetCapacity();
   3061   }
   3062   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
   3063     return GetBuffer().GetCapacity();
   3064   }
   3065 
   3066   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
   3067                   size_t GetRemainingBufferSpace() const) {
   3068     return GetBuffer().GetRemainingBytes();
   3069   }
   3070   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
   3071                   size_t RemainingBufferSpace() const) {
   3072     return GetBuffer().GetRemainingBytes();
   3073   }
   3074 
   3075   PositionIndependentCodeOption GetPic() const { return pic_; }
   3076   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
   3077     return GetPic();
   3078   }
   3079 
   3080   bool AllowPageOffsetDependentCode() const {
   3081     return (GetPic() == PageOffsetDependentCode) ||
   3082            (GetPic() == PositionDependentCode);
   3083   }
   3084 
   3085   static Register AppropriateZeroRegFor(const CPURegister& reg) {
   3086     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
   3087   }
   3088 
   3089  protected:
   3090   void LoadStore(const CPURegister& rt,
   3091                  const MemOperand& addr,
   3092                  LoadStoreOp op,
   3093                  LoadStoreScalingOption option = PreferScaledOffset);
   3094 
   3095   void LoadStorePair(const CPURegister& rt,
   3096                      const CPURegister& rt2,
   3097                      const MemOperand& addr,
   3098                      LoadStorePairOp op);
   3099   void LoadStoreStruct(const VRegister& vt,
   3100                        const MemOperand& addr,
   3101                        NEONLoadStoreMultiStructOp op);
   3102   void LoadStoreStruct1(const VRegister& vt,
   3103                         int reg_count,
   3104                         const MemOperand& addr);
   3105   void LoadStoreStructSingle(const VRegister& vt,
   3106                              uint32_t lane,
   3107                              const MemOperand& addr,
   3108                              NEONLoadStoreSingleStructOp op);
   3109   void LoadStoreStructSingleAllLanes(const VRegister& vt,
   3110                                      const MemOperand& addr,
   3111                                      NEONLoadStoreSingleStructOp op);
   3112   void LoadStoreStructVerify(const VRegister& vt,
   3113                              const MemOperand& addr,
   3114                              Instr op);
   3115 
   3116   void Prefetch(PrefetchOperation op,
   3117                 const MemOperand& addr,
   3118                 LoadStoreScalingOption option = PreferScaledOffset);
   3119 
   3120   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
   3121   // reports a bogus uninitialised warning then.
   3122   void Logical(const Register& rd,
   3123                const Register& rn,
   3124                const Operand operand,
   3125                LogicalOp op);
   3126   void LogicalImmediate(const Register& rd,
   3127                         const Register& rn,
   3128                         unsigned n,
   3129                         unsigned imm_s,
   3130                         unsigned imm_r,
   3131                         LogicalOp op);
   3132 
   3133   void ConditionalCompare(const Register& rn,
   3134                           const Operand& operand,
   3135                           StatusFlags nzcv,
   3136                           Condition cond,
   3137                           ConditionalCompareOp op);
   3138 
   3139   void AddSubWithCarry(const Register& rd,
   3140                        const Register& rn,
   3141                        const Operand& operand,
   3142                        FlagsUpdate S,
   3143                        AddSubWithCarryOp op);
   3144 
   3145 
   3146   // Functions for emulating operands not directly supported by the instruction
   3147   // set.
   3148   void EmitShift(const Register& rd,
   3149                  const Register& rn,
   3150                  Shift shift,
   3151                  unsigned amount);
   3152   void EmitExtendShift(const Register& rd,
   3153                        const Register& rn,
   3154                        Extend extend,
   3155                        unsigned left_shift);
   3156 
   3157   void AddSub(const Register& rd,
   3158               const Register& rn,
   3159               const Operand& operand,
   3160               FlagsUpdate S,
   3161               AddSubOp op);
   3162 
   3163   void NEONTable(const VRegister& vd,
   3164                  const VRegister& vn,
   3165                  const VRegister& vm,
   3166                  NEONTableOp op);
   3167 
   3168   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
   3169   // registers. Only simple loads are supported; sign- and zero-extension (such
   3170   // as in LDPSW_x or LDRB_w) are not supported.
   3171   static LoadStoreOp LoadOpFor(const CPURegister& rt);
   3172   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
   3173                                        const CPURegister& rt2);
   3174   static LoadStoreOp StoreOpFor(const CPURegister& rt);
   3175   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
   3176                                         const CPURegister& rt2);
   3177   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
   3178       const CPURegister& rt, const CPURegister& rt2);
   3179   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
   3180       const CPURegister& rt, const CPURegister& rt2);
   3181   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
   3182 
   3183 
   3184  private:
   3185   static uint32_t FP32ToImm8(float imm);
   3186   static uint32_t FP64ToImm8(double imm);
   3187 
   3188   // Instruction helpers.
   3189   void MoveWide(const Register& rd,
   3190                 uint64_t imm,
   3191                 int shift,
   3192                 MoveWideImmediateOp mov_op);
   3193   void DataProcShiftedRegister(const Register& rd,
   3194                                const Register& rn,
   3195                                const Operand& operand,
   3196                                FlagsUpdate S,
   3197                                Instr op);
   3198   void DataProcExtendedRegister(const Register& rd,
   3199                                 const Register& rn,
   3200                                 const Operand& operand,
   3201                                 FlagsUpdate S,
   3202                                 Instr op);
   3203   void LoadStorePairNonTemporal(const CPURegister& rt,
   3204                                 const CPURegister& rt2,
   3205                                 const MemOperand& addr,
   3206                                 LoadStorePairNonTemporalOp op);
   3207   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
   3208   void ConditionalSelect(const Register& rd,
   3209                          const Register& rn,
   3210                          const Register& rm,
   3211                          Condition cond,
   3212                          ConditionalSelectOp op);
   3213   void DataProcessing1Source(const Register& rd,
   3214                              const Register& rn,
   3215                              DataProcessing1SourceOp op);
   3216   void DataProcessing3Source(const Register& rd,
   3217                              const Register& rn,
   3218                              const Register& rm,
   3219                              const Register& ra,
   3220                              DataProcessing3SourceOp op);
   3221   void FPDataProcessing1Source(const VRegister& fd,
   3222                                const VRegister& fn,
   3223                                FPDataProcessing1SourceOp op);
   3224   void FPDataProcessing3Source(const VRegister& fd,
   3225                                const VRegister& fn,
   3226                                const VRegister& fm,
   3227                                const VRegister& fa,
   3228                                FPDataProcessing3SourceOp op);
   3229   void NEONAcrossLanesL(const VRegister& vd,
   3230                         const VRegister& vn,
   3231                         NEONAcrossLanesOp op);
   3232   void NEONAcrossLanes(const VRegister& vd,
   3233                        const VRegister& vn,
   3234                        NEONAcrossLanesOp op);
   3235   void NEONModifiedImmShiftLsl(const VRegister& vd,
   3236                                const int imm8,
   3237                                const int left_shift,
   3238                                NEONModifiedImmediateOp op);
   3239   void NEONModifiedImmShiftMsl(const VRegister& vd,
   3240                                const int imm8,
   3241                                const int shift_amount,
   3242                                NEONModifiedImmediateOp op);
   3243   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
   3244   void NEON3Same(const VRegister& vd,
   3245                  const VRegister& vn,
   3246                  const VRegister& vm,
   3247                  NEON3SameOp vop);
   3248   void NEONFP3Same(const VRegister& vd,
   3249                    const VRegister& vn,
   3250                    const VRegister& vm,
   3251                    Instr op);
   3252   void NEON3DifferentL(const VRegister& vd,
   3253                        const VRegister& vn,
   3254                        const VRegister& vm,
   3255                        NEON3DifferentOp vop);
   3256   void NEON3DifferentW(const VRegister& vd,
   3257                        const VRegister& vn,
   3258                        const VRegister& vm,
   3259                        NEON3DifferentOp vop);
   3260   void NEON3DifferentHN(const VRegister& vd,
   3261                         const VRegister& vn,
   3262                         const VRegister& vm,
   3263                         NEON3DifferentOp vop);
   3264   void NEONFP2RegMisc(const VRegister& vd,
   3265                       const VRegister& vn,
   3266                       NEON2RegMiscOp vop,
   3267                       double value = 0.0);
   3268   void NEON2RegMisc(const VRegister& vd,
   3269                     const VRegister& vn,
   3270                     NEON2RegMiscOp vop,
   3271                     int value = 0);
   3272   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
   3273   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
   3274   void NEONPerm(const VRegister& vd,
   3275                 const VRegister& vn,
   3276                 const VRegister& vm,
   3277                 NEONPermOp op);
   3278   void NEONFPByElement(const VRegister& vd,
   3279                        const VRegister& vn,
   3280                        const VRegister& vm,
   3281                        int vm_index,
   3282                        NEONByIndexedElementOp op);
   3283   void NEONByElement(const VRegister& vd,
   3284                      const VRegister& vn,
   3285                      const VRegister& vm,
   3286                      int vm_index,
   3287                      NEONByIndexedElementOp op);
   3288   void NEONByElementL(const VRegister& vd,
   3289                       const VRegister& vn,
   3290                       const VRegister& vm,
   3291                       int vm_index,
   3292                       NEONByIndexedElementOp op);
   3293   void NEONShiftImmediate(const VRegister& vd,
   3294                           const VRegister& vn,
   3295                           NEONShiftImmediateOp op,
   3296                           int immh_immb);
   3297   void NEONShiftLeftImmediate(const VRegister& vd,
   3298                               const VRegister& vn,
   3299                               int shift,
   3300                               NEONShiftImmediateOp op);
   3301   void NEONShiftRightImmediate(const VRegister& vd,
   3302                                const VRegister& vn,
   3303                                int shift,
   3304                                NEONShiftImmediateOp op);
   3305   void NEONShiftImmediateL(const VRegister& vd,
   3306                            const VRegister& vn,
   3307                            int shift,
   3308                            NEONShiftImmediateOp op);
   3309   void NEONShiftImmediateN(const VRegister& vd,
   3310                            const VRegister& vn,
   3311                            int shift,
   3312                            NEONShiftImmediateOp op);
   3313   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
   3314 
   3315   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
   3316 
   3317   // Encode the specified MemOperand for the specified access size and scaling
   3318   // preference.
   3319   Instr LoadStoreMemOperand(const MemOperand& addr,
   3320                             unsigned access_size,
   3321                             LoadStoreScalingOption option);
   3322 
   3323   // Link the current (not-yet-emitted) instruction to the specified label, then
   3324   // return an offset to be encoded in the instruction. If the label is not yet
   3325   // bound, an offset of 0 is returned.
   3326   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
   3327   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
   3328   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
   3329 
   3330   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
   3331   template <int element_shift>
   3332   ptrdiff_t LinkAndGetOffsetTo(Label* label);
   3333 
   3334   // Literal load offset are in words (32-bit).
   3335   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
   3336 
   3337   // Emit the instruction in buffer_.
   3338   void Emit(Instr instruction) {
   3339     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
   3340     VIXL_ASSERT(AllowAssembler());
   3341     GetBuffer()->Emit32(instruction);
   3342   }
   3343 
   3344   PositionIndependentCodeOption pic_;
   3345 };
   3346 
   3347 
   3348 template <typename T>
   3349 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
   3350   return UpdateValue(new_value,
   3351                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
   3352 }
   3353 
   3354 
   3355 template <typename T>
   3356 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
   3357   return UpdateValue(high64,
   3358                      low64,
   3359                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
   3360 }
   3361 
   3362 
   3363 }  // namespace aarch64
   3364 
   3365 // Required InvalSet template specialisations.
   3366 // TODO: These template specialisations should not live in this file.  Move
   3367 // Label out of the aarch64 namespace in order to share its implementation
   3368 // later.
   3369 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
   3370   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
   3371       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
   3372       aarch64::Label::kReclaimFactor
   3373 template <>
   3374 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
   3375     const ptrdiff_t& element) {
   3376   return element;
   3377 }
   3378 template <>
   3379 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
   3380                                                             ptrdiff_t key) {
   3381   *element = key;
   3382 }
   3383 #undef INVAL_SET_TEMPLATE_PARAMETERS
   3384 
   3385 }  // namespace vixl
   3386 
   3387 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
   3388