1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_ 28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_ 29 30 #include "../assembler-base-vixl.h" 31 #include "../code-generation-scopes-vixl.h" 32 #include "../globals-vixl.h" 33 #include "../invalset-vixl.h" 34 #include "../utils-vixl.h" 35 36 #include "operands-aarch64.h" 37 38 namespace vixl { 39 namespace aarch64 { 40 41 class LabelTestHelper; // Forward declaration. 42 43 44 class Label { 45 public: 46 Label() : location_(kLocationUnbound) {} 47 ~Label() { 48 // All links to a label must have been resolved before it is destructed. 49 VIXL_ASSERT(!IsLinked()); 50 } 51 52 bool IsBound() const { return location_ >= 0; } 53 bool IsLinked() const { return !links_.empty(); } 54 55 ptrdiff_t GetLocation() const { return location_; } 56 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) { 57 return GetLocation(); 58 } 59 60 static const int kNPreallocatedLinks = 4; 61 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX; 62 static const size_t kReclaimFrom = 512; 63 static const size_t kReclaimFactor = 2; 64 65 typedef InvalSet<ptrdiff_t, 66 kNPreallocatedLinks, 67 ptrdiff_t, 68 kInvalidLinkKey, 69 kReclaimFrom, 70 kReclaimFactor> 71 LinksSetBase; 72 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase; 73 74 private: 75 class LinksSet : public LinksSetBase { 76 public: 77 LinksSet() : LinksSetBase() {} 78 }; 79 80 // Allows iterating over the links of a label. The behaviour is undefined if 81 // the list of links is modified in any way while iterating. 82 class LabelLinksIterator : public LabelLinksIteratorBase { 83 public: 84 explicit LabelLinksIterator(Label* label) 85 : LabelLinksIteratorBase(&label->links_) {} 86 87 // TODO: Remove these and use the STL-like interface instead. 88 using LabelLinksIteratorBase::Advance; 89 using LabelLinksIteratorBase::Current; 90 }; 91 92 void Bind(ptrdiff_t location) { 93 // Labels can only be bound once. 94 VIXL_ASSERT(!IsBound()); 95 location_ = location; 96 } 97 98 void AddLink(ptrdiff_t instruction) { 99 // If a label is bound, the assembler already has the information it needs 100 // to write the instruction, so there is no need to add it to links_. 101 VIXL_ASSERT(!IsBound()); 102 links_.insert(instruction); 103 } 104 105 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); } 106 107 void ClearAllLinks() { links_.clear(); } 108 109 // TODO: The comment below considers average case complexity for our 110 // usual use-cases. The elements of interest are: 111 // - Branches to a label are emitted in order: branch instructions to a label 112 // are generated at an offset in the code generation buffer greater than any 113 // other branch to that same label already generated. As an example, this can 114 // be broken when an instruction is patched to become a branch. Note that the 115 // code will still work, but the complexity considerations below may locally 116 // not apply any more. 117 // - Veneers are generated in order: for multiple branches of the same type 118 // branching to the same unbound label going out of range, veneers are 119 // generated in growing order of the branch instruction offset from the start 120 // of the buffer. 121 // 122 // When creating a veneer for a branch going out of range, the link for this 123 // branch needs to be removed from this `links_`. Since all branches are 124 // tracked in one underlying InvalSet, the complexity for this deletion is the 125 // same as for finding the element, ie. O(n), where n is the number of links 126 // in the set. 127 // This could be reduced to O(1) by using the same trick as used when tracking 128 // branch information for veneers: split the container to use one set per type 129 // of branch. With that setup, when a veneer is created and the link needs to 130 // be deleted, if the two points above hold, it must be the minimum element of 131 // the set for its type of branch, and that minimum element will be accessible 132 // in O(1). 133 134 // The offsets of the instructions that have linked to this label. 135 LinksSet links_; 136 // The label location. 137 ptrdiff_t location_; 138 139 static const ptrdiff_t kLocationUnbound = -1; 140 141 // It is not safe to copy labels, so disable the copy constructor and operator 142 // by declaring them private (without an implementation). 143 #if __cplusplus >= 201103L 144 Label(const Label&) = delete; 145 void operator=(const Label&) = delete; 146 #else 147 Label(const Label&); 148 void operator=(const Label&); 149 #endif 150 151 // The Assembler class is responsible for binding and linking labels, since 152 // the stored offsets need to be consistent with the Assembler's buffer. 153 friend class Assembler; 154 // The MacroAssembler and VeneerPool handle resolution of branches to distant 155 // targets. 156 friend class MacroAssembler; 157 friend class VeneerPool; 158 }; 159 160 161 class Assembler; 162 class LiteralPool; 163 164 // A literal is a 32-bit or 64-bit piece of data stored in the instruction 165 // stream and loaded through a pc relative load. The same literal can be 166 // referred to by multiple instructions but a literal can only reside at one 167 // place in memory. A literal can be used by a load before or after being 168 // placed in memory. 169 // 170 // Internally an offset of 0 is associated with a literal which has been 171 // neither used nor placed. Then two possibilities arise: 172 // 1) the label is placed, the offset (stored as offset + 1) is used to 173 // resolve any subsequent load using the label. 174 // 2) the label is not placed and offset is the offset of the last load using 175 // the literal (stored as -offset -1). If multiple loads refer to this 176 // literal then the last load holds the offset of the preceding load and 177 // all loads form a chain. Once the offset is placed all the loads in the 178 // chain are resolved and future loads fall back to possibility 1. 179 class RawLiteral { 180 public: 181 enum DeletionPolicy { 182 kDeletedOnPlacementByPool, 183 kDeletedOnPoolDestruction, 184 kManuallyDeleted 185 }; 186 187 RawLiteral(size_t size, 188 LiteralPool* literal_pool, 189 DeletionPolicy deletion_policy = kManuallyDeleted); 190 191 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are 192 // actually pointing to `Literal<T>` objects. 193 virtual ~RawLiteral() {} 194 195 size_t GetSize() const { 196 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes); 197 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes); 198 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) || 199 (size_ == kQRegSizeInBytes)); 200 return size_; 201 } 202 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); } 203 204 uint64_t GetRawValue128Low64() const { 205 VIXL_ASSERT(size_ == kQRegSizeInBytes); 206 return low64_; 207 } 208 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) { 209 return GetRawValue128Low64(); 210 } 211 212 uint64_t GetRawValue128High64() const { 213 VIXL_ASSERT(size_ == kQRegSizeInBytes); 214 return high64_; 215 } 216 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) { 217 return GetRawValue128High64(); 218 } 219 220 uint64_t GetRawValue64() const { 221 VIXL_ASSERT(size_ == kXRegSizeInBytes); 222 VIXL_ASSERT(high64_ == 0); 223 return low64_; 224 } 225 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) { 226 return GetRawValue64(); 227 } 228 229 uint32_t GetRawValue32() const { 230 VIXL_ASSERT(size_ == kWRegSizeInBytes); 231 VIXL_ASSERT(high64_ == 0); 232 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_)); 233 return static_cast<uint32_t>(low64_); 234 } 235 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) { 236 return GetRawValue32(); 237 } 238 239 bool IsUsed() const { return offset_ < 0; } 240 bool IsPlaced() const { return offset_ > 0; } 241 242 LiteralPool* GetLiteralPool() const { return literal_pool_; } 243 244 ptrdiff_t GetOffset() const { 245 VIXL_ASSERT(IsPlaced()); 246 return offset_ - 1; 247 } 248 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); } 249 250 protected: 251 void SetOffset(ptrdiff_t offset) { 252 VIXL_ASSERT(offset >= 0); 253 VIXL_ASSERT(IsWordAligned(offset)); 254 VIXL_ASSERT(!IsPlaced()); 255 offset_ = offset + 1; 256 } 257 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) { 258 SetOffset(offset); 259 } 260 261 ptrdiff_t GetLastUse() const { 262 VIXL_ASSERT(IsUsed()); 263 return -offset_ - 1; 264 } 265 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); } 266 267 void SetLastUse(ptrdiff_t offset) { 268 VIXL_ASSERT(offset >= 0); 269 VIXL_ASSERT(IsWordAligned(offset)); 270 VIXL_ASSERT(!IsPlaced()); 271 offset_ = -offset - 1; 272 } 273 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) { 274 SetLastUse(offset); 275 } 276 277 size_t size_; 278 ptrdiff_t offset_; 279 uint64_t low64_; 280 uint64_t high64_; 281 282 private: 283 LiteralPool* literal_pool_; 284 DeletionPolicy deletion_policy_; 285 286 friend class Assembler; 287 friend class LiteralPool; 288 }; 289 290 291 template <typename T> 292 class Literal : public RawLiteral { 293 public: 294 explicit Literal(T value, 295 LiteralPool* literal_pool = NULL, 296 RawLiteral::DeletionPolicy ownership = kManuallyDeleted) 297 : RawLiteral(sizeof(value), literal_pool, ownership) { 298 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes); 299 UpdateValue(value); 300 } 301 302 Literal(T high64, 303 T low64, 304 LiteralPool* literal_pool = NULL, 305 RawLiteral::DeletionPolicy ownership = kManuallyDeleted) 306 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) { 307 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2)); 308 UpdateValue(high64, low64); 309 } 310 311 virtual ~Literal() {} 312 313 // Update the value of this literal, if necessary by rewriting the value in 314 // the pool. 315 // If the literal has already been placed in a literal pool, the address of 316 // the start of the code buffer must be provided, as the literal only knows it 317 // offset from there. This also allows patching the value after the code has 318 // been moved in memory. 319 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) { 320 VIXL_ASSERT(sizeof(new_value) == size_); 321 memcpy(&low64_, &new_value, sizeof(new_value)); 322 if (IsPlaced()) { 323 VIXL_ASSERT(code_buffer != NULL); 324 RewriteValueInCode(code_buffer); 325 } 326 } 327 328 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) { 329 VIXL_ASSERT(sizeof(low64) == size_ / 2); 330 memcpy(&low64_, &low64, sizeof(low64)); 331 memcpy(&high64_, &high64, sizeof(high64)); 332 if (IsPlaced()) { 333 VIXL_ASSERT(code_buffer != NULL); 334 RewriteValueInCode(code_buffer); 335 } 336 } 337 338 void UpdateValue(T new_value, const Assembler* assembler); 339 void UpdateValue(T high64, T low64, const Assembler* assembler); 340 341 private: 342 void RewriteValueInCode(uint8_t* code_buffer) { 343 VIXL_ASSERT(IsPlaced()); 344 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes); 345 switch (GetSize()) { 346 case kSRegSizeInBytes: 347 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) = 348 GetRawValue32(); 349 break; 350 case kDRegSizeInBytes: 351 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) = 352 GetRawValue64(); 353 break; 354 default: 355 VIXL_ASSERT(GetSize() == kQRegSizeInBytes); 356 uint64_t* base_address = 357 reinterpret_cast<uint64_t*>(code_buffer + GetOffset()); 358 *base_address = GetRawValue128Low64(); 359 *(base_address + 1) = GetRawValue128High64(); 360 } 361 } 362 }; 363 364 365 // Control whether or not position-independent code should be emitted. 366 enum PositionIndependentCodeOption { 367 // All code generated will be position-independent; all branches and 368 // references to labels generated with the Label class will use PC-relative 369 // addressing. 370 PositionIndependentCode, 371 372 // Allow VIXL to generate code that refers to absolute addresses. With this 373 // option, it will not be possible to copy the code buffer and run it from a 374 // different address; code must be generated in its final location. 375 PositionDependentCode, 376 377 // Allow VIXL to assume that the bottom 12 bits of the address will be 378 // constant, but that the top 48 bits may change. This allows `adrp` to 379 // function in systems which copy code between pages, but otherwise maintain 380 // 4KB page alignment. 381 PageOffsetDependentCode 382 }; 383 384 385 // Control how scaled- and unscaled-offset loads and stores are generated. 386 enum LoadStoreScalingOption { 387 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset, 388 // register-offset, pre-index or post-index instructions if necessary. 389 PreferScaledOffset, 390 391 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset, 392 // register-offset, pre-index or post-index instructions if necessary. 393 PreferUnscaledOffset, 394 395 // Require scaled-immediate-offset instructions. 396 RequireScaledOffset, 397 398 // Require unscaled-immediate-offset instructions. 399 RequireUnscaledOffset 400 }; 401 402 403 // Assembler. 404 class Assembler : public vixl::internal::AssemblerBase { 405 public: 406 explicit Assembler( 407 PositionIndependentCodeOption pic = PositionIndependentCode) 408 : pic_(pic) {} 409 explicit Assembler( 410 size_t capacity, 411 PositionIndependentCodeOption pic = PositionIndependentCode) 412 : AssemblerBase(capacity), pic_(pic) {} 413 Assembler(byte* buffer, 414 size_t capacity, 415 PositionIndependentCodeOption pic = PositionIndependentCode) 416 : AssemblerBase(buffer, capacity), pic_(pic) {} 417 418 // Upon destruction, the code will assert that one of the following is true: 419 // * The Assembler object has not been used. 420 // * Nothing has been emitted since the last Reset() call. 421 // * Nothing has been emitted since the last FinalizeCode() call. 422 ~Assembler() {} 423 424 // System functions. 425 426 // Start generating code from the beginning of the buffer, discarding any code 427 // and data that has already been emitted into the buffer. 428 void Reset(); 429 430 // Label. 431 // Bind a label to the current PC. 432 void bind(Label* label); 433 434 // Bind a label to a specified offset from the start of the buffer. 435 void BindToOffset(Label* label, ptrdiff_t offset); 436 437 // Place a literal at the current PC. 438 void place(RawLiteral* literal); 439 440 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) { 441 return GetCursorOffset(); 442 } 443 444 VIXL_DEPRECATED("GetBuffer().GetCapacity()", 445 ptrdiff_t GetBufferEndOffset() const) { 446 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity()); 447 } 448 VIXL_DEPRECATED("GetBuffer().GetCapacity()", 449 ptrdiff_t BufferEndOffset() const) { 450 return GetBuffer().GetCapacity(); 451 } 452 453 // Return the address of a bound label. 454 template <typename T> 455 T GetLabelAddress(const Label* label) const { 456 VIXL_ASSERT(label->IsBound()); 457 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); 458 return GetBuffer().GetOffsetAddress<T>(label->GetLocation()); 459 } 460 461 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) { 462 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset); 463 } 464 VIXL_DEPRECATED("GetInstructionAt", 465 Instruction* InstructionAt(ptrdiff_t instruction_offset)) { 466 return GetInstructionAt(instruction_offset); 467 } 468 469 ptrdiff_t GetInstructionOffset(Instruction* instruction) { 470 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1); 471 ptrdiff_t offset = 472 instruction - GetBuffer()->GetStartAddress<Instruction*>(); 473 VIXL_ASSERT((0 <= offset) && 474 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity()))); 475 return offset; 476 } 477 VIXL_DEPRECATED("GetInstructionOffset", 478 ptrdiff_t InstructionOffset(Instruction* instruction)) { 479 return GetInstructionOffset(instruction); 480 } 481 482 // Instruction set functions. 483 484 // Branch / Jump instructions. 485 // Branch to register. 486 void br(const Register& xn); 487 488 // Branch with link to register. 489 void blr(const Register& xn); 490 491 // Branch to register with return hint. 492 void ret(const Register& xn = lr); 493 494 // Unconditional branch to label. 495 void b(Label* label); 496 497 // Conditional branch to label. 498 void b(Label* label, Condition cond); 499 500 // Unconditional branch to PC offset. 501 void b(int64_t imm26); 502 503 // Conditional branch to PC offset. 504 void b(int64_t imm19, Condition cond); 505 506 // Branch with link to label. 507 void bl(Label* label); 508 509 // Branch with link to PC offset. 510 void bl(int64_t imm26); 511 512 // Compare and branch to label if zero. 513 void cbz(const Register& rt, Label* label); 514 515 // Compare and branch to PC offset if zero. 516 void cbz(const Register& rt, int64_t imm19); 517 518 // Compare and branch to label if not zero. 519 void cbnz(const Register& rt, Label* label); 520 521 // Compare and branch to PC offset if not zero. 522 void cbnz(const Register& rt, int64_t imm19); 523 524 // Table lookup from one register. 525 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 526 527 // Table lookup from two registers. 528 void tbl(const VRegister& vd, 529 const VRegister& vn, 530 const VRegister& vn2, 531 const VRegister& vm); 532 533 // Table lookup from three registers. 534 void tbl(const VRegister& vd, 535 const VRegister& vn, 536 const VRegister& vn2, 537 const VRegister& vn3, 538 const VRegister& vm); 539 540 // Table lookup from four registers. 541 void tbl(const VRegister& vd, 542 const VRegister& vn, 543 const VRegister& vn2, 544 const VRegister& vn3, 545 const VRegister& vn4, 546 const VRegister& vm); 547 548 // Table lookup extension from one register. 549 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 550 551 // Table lookup extension from two registers. 552 void tbx(const VRegister& vd, 553 const VRegister& vn, 554 const VRegister& vn2, 555 const VRegister& vm); 556 557 // Table lookup extension from three registers. 558 void tbx(const VRegister& vd, 559 const VRegister& vn, 560 const VRegister& vn2, 561 const VRegister& vn3, 562 const VRegister& vm); 563 564 // Table lookup extension from four registers. 565 void tbx(const VRegister& vd, 566 const VRegister& vn, 567 const VRegister& vn2, 568 const VRegister& vn3, 569 const VRegister& vn4, 570 const VRegister& vm); 571 572 // Test bit and branch to label if zero. 573 void tbz(const Register& rt, unsigned bit_pos, Label* label); 574 575 // Test bit and branch to PC offset if zero. 576 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14); 577 578 // Test bit and branch to label if not zero. 579 void tbnz(const Register& rt, unsigned bit_pos, Label* label); 580 581 // Test bit and branch to PC offset if not zero. 582 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14); 583 584 // Address calculation instructions. 585 // Calculate a PC-relative address. Unlike for branches the offset in adr is 586 // unscaled (i.e. the result can be unaligned). 587 588 // Calculate the address of a label. 589 void adr(const Register& xd, Label* label); 590 591 // Calculate the address of a PC offset. 592 void adr(const Register& xd, int64_t imm21); 593 594 // Calculate the page address of a label. 595 void adrp(const Register& xd, Label* label); 596 597 // Calculate the page address of a PC offset. 598 void adrp(const Register& xd, int64_t imm21); 599 600 // Data Processing instructions. 601 // Add. 602 void add(const Register& rd, const Register& rn, const Operand& operand); 603 604 // Add and update status flags. 605 void adds(const Register& rd, const Register& rn, const Operand& operand); 606 607 // Compare negative. 608 void cmn(const Register& rn, const Operand& operand); 609 610 // Subtract. 611 void sub(const Register& rd, const Register& rn, const Operand& operand); 612 613 // Subtract and update status flags. 614 void subs(const Register& rd, const Register& rn, const Operand& operand); 615 616 // Compare. 617 void cmp(const Register& rn, const Operand& operand); 618 619 // Negate. 620 void neg(const Register& rd, const Operand& operand); 621 622 // Negate and update status flags. 623 void negs(const Register& rd, const Operand& operand); 624 625 // Add with carry bit. 626 void adc(const Register& rd, const Register& rn, const Operand& operand); 627 628 // Add with carry bit and update status flags. 629 void adcs(const Register& rd, const Register& rn, const Operand& operand); 630 631 // Subtract with carry bit. 632 void sbc(const Register& rd, const Register& rn, const Operand& operand); 633 634 // Subtract with carry bit and update status flags. 635 void sbcs(const Register& rd, const Register& rn, const Operand& operand); 636 637 // Negate with carry bit. 638 void ngc(const Register& rd, const Operand& operand); 639 640 // Negate with carry bit and update status flags. 641 void ngcs(const Register& rd, const Operand& operand); 642 643 // Logical instructions. 644 // Bitwise and (A & B). 645 void and_(const Register& rd, const Register& rn, const Operand& operand); 646 647 // Bitwise and (A & B) and update status flags. 648 void ands(const Register& rd, const Register& rn, const Operand& operand); 649 650 // Bit test and set flags. 651 void tst(const Register& rn, const Operand& operand); 652 653 // Bit clear (A & ~B). 654 void bic(const Register& rd, const Register& rn, const Operand& operand); 655 656 // Bit clear (A & ~B) and update status flags. 657 void bics(const Register& rd, const Register& rn, const Operand& operand); 658 659 // Bitwise or (A | B). 660 void orr(const Register& rd, const Register& rn, const Operand& operand); 661 662 // Bitwise nor (A | ~B). 663 void orn(const Register& rd, const Register& rn, const Operand& operand); 664 665 // Bitwise eor/xor (A ^ B). 666 void eor(const Register& rd, const Register& rn, const Operand& operand); 667 668 // Bitwise enor/xnor (A ^ ~B). 669 void eon(const Register& rd, const Register& rn, const Operand& operand); 670 671 // Logical shift left by variable. 672 void lslv(const Register& rd, const Register& rn, const Register& rm); 673 674 // Logical shift right by variable. 675 void lsrv(const Register& rd, const Register& rn, const Register& rm); 676 677 // Arithmetic shift right by variable. 678 void asrv(const Register& rd, const Register& rn, const Register& rm); 679 680 // Rotate right by variable. 681 void rorv(const Register& rd, const Register& rn, const Register& rm); 682 683 // Bitfield instructions. 684 // Bitfield move. 685 void bfm(const Register& rd, 686 const Register& rn, 687 unsigned immr, 688 unsigned imms); 689 690 // Signed bitfield move. 691 void sbfm(const Register& rd, 692 const Register& rn, 693 unsigned immr, 694 unsigned imms); 695 696 // Unsigned bitfield move. 697 void ubfm(const Register& rd, 698 const Register& rn, 699 unsigned immr, 700 unsigned imms); 701 702 // Bfm aliases. 703 // Bitfield insert. 704 void bfi(const Register& rd, 705 const Register& rn, 706 unsigned lsb, 707 unsigned width) { 708 VIXL_ASSERT(width >= 1); 709 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 710 bfm(rd, 711 rn, 712 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), 713 width - 1); 714 } 715 716 // Bitfield extract and insert low. 717 void bfxil(const Register& rd, 718 const Register& rn, 719 unsigned lsb, 720 unsigned width) { 721 VIXL_ASSERT(width >= 1); 722 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 723 bfm(rd, rn, lsb, lsb + width - 1); 724 } 725 726 // Sbfm aliases. 727 // Arithmetic shift right. 728 void asr(const Register& rd, const Register& rn, unsigned shift) { 729 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits())); 730 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1); 731 } 732 733 // Signed bitfield insert with zero at right. 734 void sbfiz(const Register& rd, 735 const Register& rn, 736 unsigned lsb, 737 unsigned width) { 738 VIXL_ASSERT(width >= 1); 739 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 740 sbfm(rd, 741 rn, 742 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), 743 width - 1); 744 } 745 746 // Signed bitfield extract. 747 void sbfx(const Register& rd, 748 const Register& rn, 749 unsigned lsb, 750 unsigned width) { 751 VIXL_ASSERT(width >= 1); 752 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 753 sbfm(rd, rn, lsb, lsb + width - 1); 754 } 755 756 // Signed extend byte. 757 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } 758 759 // Signed extend halfword. 760 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } 761 762 // Signed extend word. 763 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } 764 765 // Ubfm aliases. 766 // Logical shift left. 767 void lsl(const Register& rd, const Register& rn, unsigned shift) { 768 unsigned reg_size = rd.GetSizeInBits(); 769 VIXL_ASSERT(shift < reg_size); 770 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); 771 } 772 773 // Logical shift right. 774 void lsr(const Register& rd, const Register& rn, unsigned shift) { 775 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits())); 776 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1); 777 } 778 779 // Unsigned bitfield insert with zero at right. 780 void ubfiz(const Register& rd, 781 const Register& rn, 782 unsigned lsb, 783 unsigned width) { 784 VIXL_ASSERT(width >= 1); 785 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 786 ubfm(rd, 787 rn, 788 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), 789 width - 1); 790 } 791 792 // Unsigned bitfield extract. 793 void ubfx(const Register& rd, 794 const Register& rn, 795 unsigned lsb, 796 unsigned width) { 797 VIXL_ASSERT(width >= 1); 798 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 799 ubfm(rd, rn, lsb, lsb + width - 1); 800 } 801 802 // Unsigned extend byte. 803 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } 804 805 // Unsigned extend halfword. 806 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } 807 808 // Unsigned extend word. 809 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } 810 811 // Extract. 812 void extr(const Register& rd, 813 const Register& rn, 814 const Register& rm, 815 unsigned lsb); 816 817 // Conditional select: rd = cond ? rn : rm. 818 void csel(const Register& rd, 819 const Register& rn, 820 const Register& rm, 821 Condition cond); 822 823 // Conditional select increment: rd = cond ? rn : rm + 1. 824 void csinc(const Register& rd, 825 const Register& rn, 826 const Register& rm, 827 Condition cond); 828 829 // Conditional select inversion: rd = cond ? rn : ~rm. 830 void csinv(const Register& rd, 831 const Register& rn, 832 const Register& rm, 833 Condition cond); 834 835 // Conditional select negation: rd = cond ? rn : -rm. 836 void csneg(const Register& rd, 837 const Register& rn, 838 const Register& rm, 839 Condition cond); 840 841 // Conditional set: rd = cond ? 1 : 0. 842 void cset(const Register& rd, Condition cond); 843 844 // Conditional set mask: rd = cond ? -1 : 0. 845 void csetm(const Register& rd, Condition cond); 846 847 // Conditional increment: rd = cond ? rn + 1 : rn. 848 void cinc(const Register& rd, const Register& rn, Condition cond); 849 850 // Conditional invert: rd = cond ? ~rn : rn. 851 void cinv(const Register& rd, const Register& rn, Condition cond); 852 853 // Conditional negate: rd = cond ? -rn : rn. 854 void cneg(const Register& rd, const Register& rn, Condition cond); 855 856 // Rotate right. 857 void ror(const Register& rd, const Register& rs, unsigned shift) { 858 extr(rd, rs, rs, shift); 859 } 860 861 // Conditional comparison. 862 // Conditional compare negative. 863 void ccmn(const Register& rn, 864 const Operand& operand, 865 StatusFlags nzcv, 866 Condition cond); 867 868 // Conditional compare. 869 void ccmp(const Register& rn, 870 const Operand& operand, 871 StatusFlags nzcv, 872 Condition cond); 873 874 // CRC-32 checksum from byte. 875 void crc32b(const Register& wd, const Register& wn, const Register& wm); 876 877 // CRC-32 checksum from half-word. 878 void crc32h(const Register& wd, const Register& wn, const Register& wm); 879 880 // CRC-32 checksum from word. 881 void crc32w(const Register& wd, const Register& wn, const Register& wm); 882 883 // CRC-32 checksum from double word. 884 void crc32x(const Register& wd, const Register& wn, const Register& xm); 885 886 // CRC-32 C checksum from byte. 887 void crc32cb(const Register& wd, const Register& wn, const Register& wm); 888 889 // CRC-32 C checksum from half-word. 890 void crc32ch(const Register& wd, const Register& wn, const Register& wm); 891 892 // CRC-32 C checksum from word. 893 void crc32cw(const Register& wd, const Register& wn, const Register& wm); 894 895 // CRC-32C checksum from double word. 896 void crc32cx(const Register& wd, const Register& wn, const Register& xm); 897 898 // Multiply. 899 void mul(const Register& rd, const Register& rn, const Register& rm); 900 901 // Negated multiply. 902 void mneg(const Register& rd, const Register& rn, const Register& rm); 903 904 // Signed long multiply: 32 x 32 -> 64-bit. 905 void smull(const Register& xd, const Register& wn, const Register& wm); 906 907 // Signed multiply high: 64 x 64 -> 64-bit <127:64>. 908 void smulh(const Register& xd, const Register& xn, const Register& xm); 909 910 // Multiply and accumulate. 911 void madd(const Register& rd, 912 const Register& rn, 913 const Register& rm, 914 const Register& ra); 915 916 // Multiply and subtract. 917 void msub(const Register& rd, 918 const Register& rn, 919 const Register& rm, 920 const Register& ra); 921 922 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit. 923 void smaddl(const Register& xd, 924 const Register& wn, 925 const Register& wm, 926 const Register& xa); 927 928 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit. 929 void umaddl(const Register& xd, 930 const Register& wn, 931 const Register& wm, 932 const Register& xa); 933 934 // Unsigned long multiply: 32 x 32 -> 64-bit. 935 void umull(const Register& xd, const Register& wn, const Register& wm) { 936 umaddl(xd, wn, wm, xzr); 937 } 938 939 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>. 940 void umulh(const Register& xd, const Register& xn, const Register& xm); 941 942 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit. 943 void smsubl(const Register& xd, 944 const Register& wn, 945 const Register& wm, 946 const Register& xa); 947 948 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit. 949 void umsubl(const Register& xd, 950 const Register& wn, 951 const Register& wm, 952 const Register& xa); 953 954 // Signed integer divide. 955 void sdiv(const Register& rd, const Register& rn, const Register& rm); 956 957 // Unsigned integer divide. 958 void udiv(const Register& rd, const Register& rn, const Register& rm); 959 960 // Bit reverse. 961 void rbit(const Register& rd, const Register& rn); 962 963 // Reverse bytes in 16-bit half words. 964 void rev16(const Register& rd, const Register& rn); 965 966 // Reverse bytes in 32-bit words. 967 void rev32(const Register& xd, const Register& xn); 968 969 // Reverse bytes. 970 void rev(const Register& rd, const Register& rn); 971 972 // Count leading zeroes. 973 void clz(const Register& rd, const Register& rn); 974 975 // Count leading sign bits. 976 void cls(const Register& rd, const Register& rn); 977 978 // Memory instructions. 979 // Load integer or FP register. 980 void ldr(const CPURegister& rt, 981 const MemOperand& src, 982 LoadStoreScalingOption option = PreferScaledOffset); 983 984 // Store integer or FP register. 985 void str(const CPURegister& rt, 986 const MemOperand& dst, 987 LoadStoreScalingOption option = PreferScaledOffset); 988 989 // Load word with sign extension. 990 void ldrsw(const Register& xt, 991 const MemOperand& src, 992 LoadStoreScalingOption option = PreferScaledOffset); 993 994 // Load byte. 995 void ldrb(const Register& rt, 996 const MemOperand& src, 997 LoadStoreScalingOption option = PreferScaledOffset); 998 999 // Store byte. 1000 void strb(const Register& rt, 1001 const MemOperand& dst, 1002 LoadStoreScalingOption option = PreferScaledOffset); 1003 1004 // Load byte with sign extension. 1005 void ldrsb(const Register& rt, 1006 const MemOperand& src, 1007 LoadStoreScalingOption option = PreferScaledOffset); 1008 1009 // Load half-word. 1010 void ldrh(const Register& rt, 1011 const MemOperand& src, 1012 LoadStoreScalingOption option = PreferScaledOffset); 1013 1014 // Store half-word. 1015 void strh(const Register& rt, 1016 const MemOperand& dst, 1017 LoadStoreScalingOption option = PreferScaledOffset); 1018 1019 // Load half-word with sign extension. 1020 void ldrsh(const Register& rt, 1021 const MemOperand& src, 1022 LoadStoreScalingOption option = PreferScaledOffset); 1023 1024 // Load integer or FP register (with unscaled offset). 1025 void ldur(const CPURegister& rt, 1026 const MemOperand& src, 1027 LoadStoreScalingOption option = PreferUnscaledOffset); 1028 1029 // Store integer or FP register (with unscaled offset). 1030 void stur(const CPURegister& rt, 1031 const MemOperand& src, 1032 LoadStoreScalingOption option = PreferUnscaledOffset); 1033 1034 // Load word with sign extension. 1035 void ldursw(const Register& xt, 1036 const MemOperand& src, 1037 LoadStoreScalingOption option = PreferUnscaledOffset); 1038 1039 // Load byte (with unscaled offset). 1040 void ldurb(const Register& rt, 1041 const MemOperand& src, 1042 LoadStoreScalingOption option = PreferUnscaledOffset); 1043 1044 // Store byte (with unscaled offset). 1045 void sturb(const Register& rt, 1046 const MemOperand& dst, 1047 LoadStoreScalingOption option = PreferUnscaledOffset); 1048 1049 // Load byte with sign extension (and unscaled offset). 1050 void ldursb(const Register& rt, 1051 const MemOperand& src, 1052 LoadStoreScalingOption option = PreferUnscaledOffset); 1053 1054 // Load half-word (with unscaled offset). 1055 void ldurh(const Register& rt, 1056 const MemOperand& src, 1057 LoadStoreScalingOption option = PreferUnscaledOffset); 1058 1059 // Store half-word (with unscaled offset). 1060 void sturh(const Register& rt, 1061 const MemOperand& dst, 1062 LoadStoreScalingOption option = PreferUnscaledOffset); 1063 1064 // Load half-word with sign extension (and unscaled offset). 1065 void ldursh(const Register& rt, 1066 const MemOperand& src, 1067 LoadStoreScalingOption option = PreferUnscaledOffset); 1068 1069 // Load integer or FP register pair. 1070 void ldp(const CPURegister& rt, 1071 const CPURegister& rt2, 1072 const MemOperand& src); 1073 1074 // Store integer or FP register pair. 1075 void stp(const CPURegister& rt, 1076 const CPURegister& rt2, 1077 const MemOperand& dst); 1078 1079 // Load word pair with sign extension. 1080 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src); 1081 1082 // Load integer or FP register pair, non-temporal. 1083 void ldnp(const CPURegister& rt, 1084 const CPURegister& rt2, 1085 const MemOperand& src); 1086 1087 // Store integer or FP register pair, non-temporal. 1088 void stnp(const CPURegister& rt, 1089 const CPURegister& rt2, 1090 const MemOperand& dst); 1091 1092 // Load integer or FP register from literal pool. 1093 void ldr(const CPURegister& rt, RawLiteral* literal); 1094 1095 // Load word with sign extension from literal pool. 1096 void ldrsw(const Register& xt, RawLiteral* literal); 1097 1098 // Load integer or FP register from pc + imm19 << 2. 1099 void ldr(const CPURegister& rt, int64_t imm19); 1100 1101 // Load word with sign extension from pc + imm19 << 2. 1102 void ldrsw(const Register& xt, int64_t imm19); 1103 1104 // Store exclusive byte. 1105 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst); 1106 1107 // Store exclusive half-word. 1108 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst); 1109 1110 // Store exclusive register. 1111 void stxr(const Register& rs, const Register& rt, const MemOperand& dst); 1112 1113 // Load exclusive byte. 1114 void ldxrb(const Register& rt, const MemOperand& src); 1115 1116 // Load exclusive half-word. 1117 void ldxrh(const Register& rt, const MemOperand& src); 1118 1119 // Load exclusive register. 1120 void ldxr(const Register& rt, const MemOperand& src); 1121 1122 // Store exclusive register pair. 1123 void stxp(const Register& rs, 1124 const Register& rt, 1125 const Register& rt2, 1126 const MemOperand& dst); 1127 1128 // Load exclusive register pair. 1129 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src); 1130 1131 // Store-release exclusive byte. 1132 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst); 1133 1134 // Store-release exclusive half-word. 1135 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst); 1136 1137 // Store-release exclusive register. 1138 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst); 1139 1140 // Load-acquire exclusive byte. 1141 void ldaxrb(const Register& rt, const MemOperand& src); 1142 1143 // Load-acquire exclusive half-word. 1144 void ldaxrh(const Register& rt, const MemOperand& src); 1145 1146 // Load-acquire exclusive register. 1147 void ldaxr(const Register& rt, const MemOperand& src); 1148 1149 // Store-release exclusive register pair. 1150 void stlxp(const Register& rs, 1151 const Register& rt, 1152 const Register& rt2, 1153 const MemOperand& dst); 1154 1155 // Load-acquire exclusive register pair. 1156 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src); 1157 1158 // Store-release byte. 1159 void stlrb(const Register& rt, const MemOperand& dst); 1160 1161 // Store-release half-word. 1162 void stlrh(const Register& rt, const MemOperand& dst); 1163 1164 // Store-release register. 1165 void stlr(const Register& rt, const MemOperand& dst); 1166 1167 // Load-acquire byte. 1168 void ldarb(const Register& rt, const MemOperand& src); 1169 1170 // Load-acquire half-word. 1171 void ldarh(const Register& rt, const MemOperand& src); 1172 1173 // Load-acquire register. 1174 void ldar(const Register& rt, const MemOperand& src); 1175 1176 // Prefetch memory. 1177 void prfm(PrefetchOperation op, 1178 const MemOperand& addr, 1179 LoadStoreScalingOption option = PreferScaledOffset); 1180 1181 // Prefetch memory (with unscaled offset). 1182 void prfum(PrefetchOperation op, 1183 const MemOperand& addr, 1184 LoadStoreScalingOption option = PreferUnscaledOffset); 1185 1186 // Prefetch memory in the literal pool. 1187 void prfm(PrefetchOperation op, RawLiteral* literal); 1188 1189 // Prefetch from pc + imm19 << 2. 1190 void prfm(PrefetchOperation op, int64_t imm19); 1191 1192 // Move instructions. The default shift of -1 indicates that the move 1193 // instruction will calculate an appropriate 16-bit immediate and left shift 1194 // that is equal to the 64-bit immediate argument. If an explicit left shift 1195 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. 1196 // 1197 // For movk, an explicit shift can be used to indicate which half word should 1198 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant 1199 // half word with zero, whereas movk(x0, 0, 48) will overwrite the 1200 // most-significant. 1201 1202 // Move immediate and keep. 1203 void movk(const Register& rd, uint64_t imm, int shift = -1) { 1204 MoveWide(rd, imm, shift, MOVK); 1205 } 1206 1207 // Move inverted immediate. 1208 void movn(const Register& rd, uint64_t imm, int shift = -1) { 1209 MoveWide(rd, imm, shift, MOVN); 1210 } 1211 1212 // Move immediate. 1213 void movz(const Register& rd, uint64_t imm, int shift = -1) { 1214 MoveWide(rd, imm, shift, MOVZ); 1215 } 1216 1217 // Misc instructions. 1218 // Monitor debug-mode breakpoint. 1219 void brk(int code); 1220 1221 // Halting debug-mode breakpoint. 1222 void hlt(int code); 1223 1224 // Generate exception targeting EL1. 1225 void svc(int code); 1226 1227 // Move register to register. 1228 void mov(const Register& rd, const Register& rn); 1229 1230 // Move inverted operand to register. 1231 void mvn(const Register& rd, const Operand& operand); 1232 1233 // System instructions. 1234 // Move to register from system register. 1235 void mrs(const Register& xt, SystemRegister sysreg); 1236 1237 // Move from register to system register. 1238 void msr(SystemRegister sysreg, const Register& xt); 1239 1240 // System instruction. 1241 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr); 1242 1243 // System instruction with pre-encoded op (op1:crn:crm:op2). 1244 void sys(int op, const Register& xt = xzr); 1245 1246 // System data cache operation. 1247 void dc(DataCacheOp op, const Register& rt); 1248 1249 // System instruction cache operation. 1250 void ic(InstructionCacheOp op, const Register& rt); 1251 1252 // System hint. 1253 void hint(SystemHint code); 1254 1255 // Clear exclusive monitor. 1256 void clrex(int imm4 = 0xf); 1257 1258 // Data memory barrier. 1259 void dmb(BarrierDomain domain, BarrierType type); 1260 1261 // Data synchronization barrier. 1262 void dsb(BarrierDomain domain, BarrierType type); 1263 1264 // Instruction synchronization barrier. 1265 void isb(); 1266 1267 // Alias for system instructions. 1268 // No-op. 1269 void nop() { hint(NOP); } 1270 1271 // FP and NEON instructions. 1272 // Move double precision immediate to FP register. 1273 void fmov(const VRegister& vd, double imm); 1274 1275 // Move single precision immediate to FP register. 1276 void fmov(const VRegister& vd, float imm); 1277 1278 // Move FP register to register. 1279 void fmov(const Register& rd, const VRegister& fn); 1280 1281 // Move register to FP register. 1282 void fmov(const VRegister& vd, const Register& rn); 1283 1284 // Move FP register to FP register. 1285 void fmov(const VRegister& vd, const VRegister& fn); 1286 1287 // Move 64-bit register to top half of 128-bit FP register. 1288 void fmov(const VRegister& vd, int index, const Register& rn); 1289 1290 // Move top half of 128-bit FP register to 64-bit register. 1291 void fmov(const Register& rd, const VRegister& vn, int index); 1292 1293 // FP add. 1294 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1295 1296 // FP subtract. 1297 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1298 1299 // FP multiply. 1300 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1301 1302 // FP fused multiply-add. 1303 void fmadd(const VRegister& vd, 1304 const VRegister& vn, 1305 const VRegister& vm, 1306 const VRegister& va); 1307 1308 // FP fused multiply-subtract. 1309 void fmsub(const VRegister& vd, 1310 const VRegister& vn, 1311 const VRegister& vm, 1312 const VRegister& va); 1313 1314 // FP fused multiply-add and negate. 1315 void fnmadd(const VRegister& vd, 1316 const VRegister& vn, 1317 const VRegister& vm, 1318 const VRegister& va); 1319 1320 // FP fused multiply-subtract and negate. 1321 void fnmsub(const VRegister& vd, 1322 const VRegister& vn, 1323 const VRegister& vm, 1324 const VRegister& va); 1325 1326 // FP multiply-negate scalar. 1327 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1328 1329 // FP reciprocal exponent scalar. 1330 void frecpx(const VRegister& vd, const VRegister& vn); 1331 1332 // FP divide. 1333 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1334 1335 // FP maximum. 1336 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1337 1338 // FP minimum. 1339 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1340 1341 // FP maximum number. 1342 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1343 1344 // FP minimum number. 1345 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1346 1347 // FP absolute. 1348 void fabs(const VRegister& vd, const VRegister& vn); 1349 1350 // FP negate. 1351 void fneg(const VRegister& vd, const VRegister& vn); 1352 1353 // FP square root. 1354 void fsqrt(const VRegister& vd, const VRegister& vn); 1355 1356 // FP round to integer, nearest with ties to away. 1357 void frinta(const VRegister& vd, const VRegister& vn); 1358 1359 // FP round to integer, implicit rounding. 1360 void frinti(const VRegister& vd, const VRegister& vn); 1361 1362 // FP round to integer, toward minus infinity. 1363 void frintm(const VRegister& vd, const VRegister& vn); 1364 1365 // FP round to integer, nearest with ties to even. 1366 void frintn(const VRegister& vd, const VRegister& vn); 1367 1368 // FP round to integer, toward plus infinity. 1369 void frintp(const VRegister& vd, const VRegister& vn); 1370 1371 // FP round to integer, exact, implicit rounding. 1372 void frintx(const VRegister& vd, const VRegister& vn); 1373 1374 // FP round to integer, towards zero. 1375 void frintz(const VRegister& vd, const VRegister& vn); 1376 1377 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap); 1378 1379 void FPCompareMacro(const VRegister& vn, 1380 const VRegister& vm, 1381 FPTrapFlags trap); 1382 1383 // FP compare registers. 1384 void fcmp(const VRegister& vn, const VRegister& vm); 1385 1386 // FP compare immediate. 1387 void fcmp(const VRegister& vn, double value); 1388 1389 void FPCCompareMacro(const VRegister& vn, 1390 const VRegister& vm, 1391 StatusFlags nzcv, 1392 Condition cond, 1393 FPTrapFlags trap); 1394 1395 // FP conditional compare. 1396 void fccmp(const VRegister& vn, 1397 const VRegister& vm, 1398 StatusFlags nzcv, 1399 Condition cond); 1400 1401 // FP signaling compare registers. 1402 void fcmpe(const VRegister& vn, const VRegister& vm); 1403 1404 // FP signaling compare immediate. 1405 void fcmpe(const VRegister& vn, double value); 1406 1407 // FP conditional signaling compare. 1408 void fccmpe(const VRegister& vn, 1409 const VRegister& vm, 1410 StatusFlags nzcv, 1411 Condition cond); 1412 1413 // FP conditional select. 1414 void fcsel(const VRegister& vd, 1415 const VRegister& vn, 1416 const VRegister& vm, 1417 Condition cond); 1418 1419 // Common FP Convert functions. 1420 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); 1421 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); 1422 1423 // FP convert between precisions. 1424 void fcvt(const VRegister& vd, const VRegister& vn); 1425 1426 // FP convert to higher precision. 1427 void fcvtl(const VRegister& vd, const VRegister& vn); 1428 1429 // FP convert to higher precision (second part). 1430 void fcvtl2(const VRegister& vd, const VRegister& vn); 1431 1432 // FP convert to lower precision. 1433 void fcvtn(const VRegister& vd, const VRegister& vn); 1434 1435 // FP convert to lower prevision (second part). 1436 void fcvtn2(const VRegister& vd, const VRegister& vn); 1437 1438 // FP convert to lower precision, rounding to odd. 1439 void fcvtxn(const VRegister& vd, const VRegister& vn); 1440 1441 // FP convert to lower precision, rounding to odd (second part). 1442 void fcvtxn2(const VRegister& vd, const VRegister& vn); 1443 1444 // FP convert to signed integer, nearest with ties to away. 1445 void fcvtas(const Register& rd, const VRegister& vn); 1446 1447 // FP convert to unsigned integer, nearest with ties to away. 1448 void fcvtau(const Register& rd, const VRegister& vn); 1449 1450 // FP convert to signed integer, nearest with ties to away. 1451 void fcvtas(const VRegister& vd, const VRegister& vn); 1452 1453 // FP convert to unsigned integer, nearest with ties to away. 1454 void fcvtau(const VRegister& vd, const VRegister& vn); 1455 1456 // FP convert to signed integer, round towards -infinity. 1457 void fcvtms(const Register& rd, const VRegister& vn); 1458 1459 // FP convert to unsigned integer, round towards -infinity. 1460 void fcvtmu(const Register& rd, const VRegister& vn); 1461 1462 // FP convert to signed integer, round towards -infinity. 1463 void fcvtms(const VRegister& vd, const VRegister& vn); 1464 1465 // FP convert to unsigned integer, round towards -infinity. 1466 void fcvtmu(const VRegister& vd, const VRegister& vn); 1467 1468 // FP convert to signed integer, nearest with ties to even. 1469 void fcvtns(const Register& rd, const VRegister& vn); 1470 1471 // FP convert to unsigned integer, nearest with ties to even. 1472 void fcvtnu(const Register& rd, const VRegister& vn); 1473 1474 // FP convert to signed integer, nearest with ties to even. 1475 void fcvtns(const VRegister& rd, const VRegister& vn); 1476 1477 // FP convert to unsigned integer, nearest with ties to even. 1478 void fcvtnu(const VRegister& rd, const VRegister& vn); 1479 1480 // FP convert to signed integer or fixed-point, round towards zero. 1481 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); 1482 1483 // FP convert to unsigned integer or fixed-point, round towards zero. 1484 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); 1485 1486 // FP convert to signed integer or fixed-point, round towards zero. 1487 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); 1488 1489 // FP convert to unsigned integer or fixed-point, round towards zero. 1490 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); 1491 1492 // FP convert to signed integer, round towards +infinity. 1493 void fcvtps(const Register& rd, const VRegister& vn); 1494 1495 // FP convert to unsigned integer, round towards +infinity. 1496 void fcvtpu(const Register& rd, const VRegister& vn); 1497 1498 // FP convert to signed integer, round towards +infinity. 1499 void fcvtps(const VRegister& vd, const VRegister& vn); 1500 1501 // FP convert to unsigned integer, round towards +infinity. 1502 void fcvtpu(const VRegister& vd, const VRegister& vn); 1503 1504 // Convert signed integer or fixed point to FP. 1505 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1506 1507 // Convert unsigned integer or fixed point to FP. 1508 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1509 1510 // Convert signed integer or fixed-point to FP. 1511 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1512 1513 // Convert unsigned integer or fixed-point to FP. 1514 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1515 1516 // Unsigned absolute difference. 1517 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1518 1519 // Signed absolute difference. 1520 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1521 1522 // Unsigned absolute difference and accumulate. 1523 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1524 1525 // Signed absolute difference and accumulate. 1526 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1527 1528 // Add. 1529 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1530 1531 // Subtract. 1532 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1533 1534 // Unsigned halving add. 1535 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1536 1537 // Signed halving add. 1538 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1539 1540 // Unsigned rounding halving add. 1541 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1542 1543 // Signed rounding halving add. 1544 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1545 1546 // Unsigned halving sub. 1547 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1548 1549 // Signed halving sub. 1550 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1551 1552 // Unsigned saturating add. 1553 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1554 1555 // Signed saturating add. 1556 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1557 1558 // Unsigned saturating subtract. 1559 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1560 1561 // Signed saturating subtract. 1562 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1563 1564 // Add pairwise. 1565 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1566 1567 // Add pair of elements scalar. 1568 void addp(const VRegister& vd, const VRegister& vn); 1569 1570 // Multiply-add to accumulator. 1571 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1572 1573 // Multiply-subtract to accumulator. 1574 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1575 1576 // Multiply. 1577 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1578 1579 // Multiply by scalar element. 1580 void mul(const VRegister& vd, 1581 const VRegister& vn, 1582 const VRegister& vm, 1583 int vm_index); 1584 1585 // Multiply-add by scalar element. 1586 void mla(const VRegister& vd, 1587 const VRegister& vn, 1588 const VRegister& vm, 1589 int vm_index); 1590 1591 // Multiply-subtract by scalar element. 1592 void mls(const VRegister& vd, 1593 const VRegister& vn, 1594 const VRegister& vm, 1595 int vm_index); 1596 1597 // Signed long multiply-add by scalar element. 1598 void smlal(const VRegister& vd, 1599 const VRegister& vn, 1600 const VRegister& vm, 1601 int vm_index); 1602 1603 // Signed long multiply-add by scalar element (second part). 1604 void smlal2(const VRegister& vd, 1605 const VRegister& vn, 1606 const VRegister& vm, 1607 int vm_index); 1608 1609 // Unsigned long multiply-add by scalar element. 1610 void umlal(const VRegister& vd, 1611 const VRegister& vn, 1612 const VRegister& vm, 1613 int vm_index); 1614 1615 // Unsigned long multiply-add by scalar element (second part). 1616 void umlal2(const VRegister& vd, 1617 const VRegister& vn, 1618 const VRegister& vm, 1619 int vm_index); 1620 1621 // Signed long multiply-sub by scalar element. 1622 void smlsl(const VRegister& vd, 1623 const VRegister& vn, 1624 const VRegister& vm, 1625 int vm_index); 1626 1627 // Signed long multiply-sub by scalar element (second part). 1628 void smlsl2(const VRegister& vd, 1629 const VRegister& vn, 1630 const VRegister& vm, 1631 int vm_index); 1632 1633 // Unsigned long multiply-sub by scalar element. 1634 void umlsl(const VRegister& vd, 1635 const VRegister& vn, 1636 const VRegister& vm, 1637 int vm_index); 1638 1639 // Unsigned long multiply-sub by scalar element (second part). 1640 void umlsl2(const VRegister& vd, 1641 const VRegister& vn, 1642 const VRegister& vm, 1643 int vm_index); 1644 1645 // Signed long multiply by scalar element. 1646 void smull(const VRegister& vd, 1647 const VRegister& vn, 1648 const VRegister& vm, 1649 int vm_index); 1650 1651 // Signed long multiply by scalar element (second part). 1652 void smull2(const VRegister& vd, 1653 const VRegister& vn, 1654 const VRegister& vm, 1655 int vm_index); 1656 1657 // Unsigned long multiply by scalar element. 1658 void umull(const VRegister& vd, 1659 const VRegister& vn, 1660 const VRegister& vm, 1661 int vm_index); 1662 1663 // Unsigned long multiply by scalar element (second part). 1664 void umull2(const VRegister& vd, 1665 const VRegister& vn, 1666 const VRegister& vm, 1667 int vm_index); 1668 1669 // Signed saturating double long multiply by element. 1670 void sqdmull(const VRegister& vd, 1671 const VRegister& vn, 1672 const VRegister& vm, 1673 int vm_index); 1674 1675 // Signed saturating double long multiply by element (second part). 1676 void sqdmull2(const VRegister& vd, 1677 const VRegister& vn, 1678 const VRegister& vm, 1679 int vm_index); 1680 1681 // Signed saturating doubling long multiply-add by element. 1682 void sqdmlal(const VRegister& vd, 1683 const VRegister& vn, 1684 const VRegister& vm, 1685 int vm_index); 1686 1687 // Signed saturating doubling long multiply-add by element (second part). 1688 void sqdmlal2(const VRegister& vd, 1689 const VRegister& vn, 1690 const VRegister& vm, 1691 int vm_index); 1692 1693 // Signed saturating doubling long multiply-sub by element. 1694 void sqdmlsl(const VRegister& vd, 1695 const VRegister& vn, 1696 const VRegister& vm, 1697 int vm_index); 1698 1699 // Signed saturating doubling long multiply-sub by element (second part). 1700 void sqdmlsl2(const VRegister& vd, 1701 const VRegister& vn, 1702 const VRegister& vm, 1703 int vm_index); 1704 1705 // Compare equal. 1706 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1707 1708 // Compare signed greater than or equal. 1709 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1710 1711 // Compare signed greater than. 1712 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1713 1714 // Compare unsigned higher. 1715 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1716 1717 // Compare unsigned higher or same. 1718 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1719 1720 // Compare bitwise test bits nonzero. 1721 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1722 1723 // Compare bitwise to zero. 1724 void cmeq(const VRegister& vd, const VRegister& vn, int value); 1725 1726 // Compare signed greater than or equal to zero. 1727 void cmge(const VRegister& vd, const VRegister& vn, int value); 1728 1729 // Compare signed greater than zero. 1730 void cmgt(const VRegister& vd, const VRegister& vn, int value); 1731 1732 // Compare signed less than or equal to zero. 1733 void cmle(const VRegister& vd, const VRegister& vn, int value); 1734 1735 // Compare signed less than zero. 1736 void cmlt(const VRegister& vd, const VRegister& vn, int value); 1737 1738 // Signed shift left by register. 1739 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1740 1741 // Unsigned shift left by register. 1742 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1743 1744 // Signed saturating shift left by register. 1745 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1746 1747 // Unsigned saturating shift left by register. 1748 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1749 1750 // Signed rounding shift left by register. 1751 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1752 1753 // Unsigned rounding shift left by register. 1754 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1755 1756 // Signed saturating rounding shift left by register. 1757 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1758 1759 // Unsigned saturating rounding shift left by register. 1760 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1761 1762 // Bitwise and. 1763 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1764 1765 // Bitwise or. 1766 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1767 1768 // Bitwise or immediate. 1769 void orr(const VRegister& vd, const int imm8, const int left_shift = 0); 1770 1771 // Move register to register. 1772 void mov(const VRegister& vd, const VRegister& vn); 1773 1774 // Bitwise orn. 1775 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1776 1777 // Bitwise eor. 1778 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1779 1780 // Bit clear immediate. 1781 void bic(const VRegister& vd, const int imm8, const int left_shift = 0); 1782 1783 // Bit clear. 1784 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1785 1786 // Bitwise insert if false. 1787 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1788 1789 // Bitwise insert if true. 1790 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1791 1792 // Bitwise select. 1793 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1794 1795 // Polynomial multiply. 1796 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1797 1798 // Vector move immediate. 1799 void movi(const VRegister& vd, 1800 const uint64_t imm, 1801 Shift shift = LSL, 1802 const int shift_amount = 0); 1803 1804 // Bitwise not. 1805 void mvn(const VRegister& vd, const VRegister& vn); 1806 1807 // Vector move inverted immediate. 1808 void mvni(const VRegister& vd, 1809 const int imm8, 1810 Shift shift = LSL, 1811 const int shift_amount = 0); 1812 1813 // Signed saturating accumulate of unsigned value. 1814 void suqadd(const VRegister& vd, const VRegister& vn); 1815 1816 // Unsigned saturating accumulate of signed value. 1817 void usqadd(const VRegister& vd, const VRegister& vn); 1818 1819 // Absolute value. 1820 void abs(const VRegister& vd, const VRegister& vn); 1821 1822 // Signed saturating absolute value. 1823 void sqabs(const VRegister& vd, const VRegister& vn); 1824 1825 // Negate. 1826 void neg(const VRegister& vd, const VRegister& vn); 1827 1828 // Signed saturating negate. 1829 void sqneg(const VRegister& vd, const VRegister& vn); 1830 1831 // Bitwise not. 1832 void not_(const VRegister& vd, const VRegister& vn); 1833 1834 // Extract narrow. 1835 void xtn(const VRegister& vd, const VRegister& vn); 1836 1837 // Extract narrow (second part). 1838 void xtn2(const VRegister& vd, const VRegister& vn); 1839 1840 // Signed saturating extract narrow. 1841 void sqxtn(const VRegister& vd, const VRegister& vn); 1842 1843 // Signed saturating extract narrow (second part). 1844 void sqxtn2(const VRegister& vd, const VRegister& vn); 1845 1846 // Unsigned saturating extract narrow. 1847 void uqxtn(const VRegister& vd, const VRegister& vn); 1848 1849 // Unsigned saturating extract narrow (second part). 1850 void uqxtn2(const VRegister& vd, const VRegister& vn); 1851 1852 // Signed saturating extract unsigned narrow. 1853 void sqxtun(const VRegister& vd, const VRegister& vn); 1854 1855 // Signed saturating extract unsigned narrow (second part). 1856 void sqxtun2(const VRegister& vd, const VRegister& vn); 1857 1858 // Extract vector from pair of vectors. 1859 void ext(const VRegister& vd, 1860 const VRegister& vn, 1861 const VRegister& vm, 1862 int index); 1863 1864 // Duplicate vector element to vector or scalar. 1865 void dup(const VRegister& vd, const VRegister& vn, int vn_index); 1866 1867 // Move vector element to scalar. 1868 void mov(const VRegister& vd, const VRegister& vn, int vn_index); 1869 1870 // Duplicate general-purpose register to vector. 1871 void dup(const VRegister& vd, const Register& rn); 1872 1873 // Insert vector element from another vector element. 1874 void ins(const VRegister& vd, 1875 int vd_index, 1876 const VRegister& vn, 1877 int vn_index); 1878 1879 // Move vector element to another vector element. 1880 void mov(const VRegister& vd, 1881 int vd_index, 1882 const VRegister& vn, 1883 int vn_index); 1884 1885 // Insert vector element from general-purpose register. 1886 void ins(const VRegister& vd, int vd_index, const Register& rn); 1887 1888 // Move general-purpose register to a vector element. 1889 void mov(const VRegister& vd, int vd_index, const Register& rn); 1890 1891 // Unsigned move vector element to general-purpose register. 1892 void umov(const Register& rd, const VRegister& vn, int vn_index); 1893 1894 // Move vector element to general-purpose register. 1895 void mov(const Register& rd, const VRegister& vn, int vn_index); 1896 1897 // Signed move vector element to general-purpose register. 1898 void smov(const Register& rd, const VRegister& vn, int vn_index); 1899 1900 // One-element structure load to one register. 1901 void ld1(const VRegister& vt, const MemOperand& src); 1902 1903 // One-element structure load to two registers. 1904 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1905 1906 // One-element structure load to three registers. 1907 void ld1(const VRegister& vt, 1908 const VRegister& vt2, 1909 const VRegister& vt3, 1910 const MemOperand& src); 1911 1912 // One-element structure load to four registers. 1913 void ld1(const VRegister& vt, 1914 const VRegister& vt2, 1915 const VRegister& vt3, 1916 const VRegister& vt4, 1917 const MemOperand& src); 1918 1919 // One-element single structure load to one lane. 1920 void ld1(const VRegister& vt, int lane, const MemOperand& src); 1921 1922 // One-element single structure load to all lanes. 1923 void ld1r(const VRegister& vt, const MemOperand& src); 1924 1925 // Two-element structure load. 1926 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1927 1928 // Two-element single structure load to one lane. 1929 void ld2(const VRegister& vt, 1930 const VRegister& vt2, 1931 int lane, 1932 const MemOperand& src); 1933 1934 // Two-element single structure load to all lanes. 1935 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1936 1937 // Three-element structure load. 1938 void ld3(const VRegister& vt, 1939 const VRegister& vt2, 1940 const VRegister& vt3, 1941 const MemOperand& src); 1942 1943 // Three-element single structure load to one lane. 1944 void ld3(const VRegister& vt, 1945 const VRegister& vt2, 1946 const VRegister& vt3, 1947 int lane, 1948 const MemOperand& src); 1949 1950 // Three-element single structure load to all lanes. 1951 void ld3r(const VRegister& vt, 1952 const VRegister& vt2, 1953 const VRegister& vt3, 1954 const MemOperand& src); 1955 1956 // Four-element structure load. 1957 void ld4(const VRegister& vt, 1958 const VRegister& vt2, 1959 const VRegister& vt3, 1960 const VRegister& vt4, 1961 const MemOperand& src); 1962 1963 // Four-element single structure load to one lane. 1964 void ld4(const VRegister& vt, 1965 const VRegister& vt2, 1966 const VRegister& vt3, 1967 const VRegister& vt4, 1968 int lane, 1969 const MemOperand& src); 1970 1971 // Four-element single structure load to all lanes. 1972 void ld4r(const VRegister& vt, 1973 const VRegister& vt2, 1974 const VRegister& vt3, 1975 const VRegister& vt4, 1976 const MemOperand& src); 1977 1978 // Count leading sign bits. 1979 void cls(const VRegister& vd, const VRegister& vn); 1980 1981 // Count leading zero bits (vector). 1982 void clz(const VRegister& vd, const VRegister& vn); 1983 1984 // Population count per byte. 1985 void cnt(const VRegister& vd, const VRegister& vn); 1986 1987 // Reverse bit order. 1988 void rbit(const VRegister& vd, const VRegister& vn); 1989 1990 // Reverse elements in 16-bit halfwords. 1991 void rev16(const VRegister& vd, const VRegister& vn); 1992 1993 // Reverse elements in 32-bit words. 1994 void rev32(const VRegister& vd, const VRegister& vn); 1995 1996 // Reverse elements in 64-bit doublewords. 1997 void rev64(const VRegister& vd, const VRegister& vn); 1998 1999 // Unsigned reciprocal square root estimate. 2000 void ursqrte(const VRegister& vd, const VRegister& vn); 2001 2002 // Unsigned reciprocal estimate. 2003 void urecpe(const VRegister& vd, const VRegister& vn); 2004 2005 // Signed pairwise long add. 2006 void saddlp(const VRegister& vd, const VRegister& vn); 2007 2008 // Unsigned pairwise long add. 2009 void uaddlp(const VRegister& vd, const VRegister& vn); 2010 2011 // Signed pairwise long add and accumulate. 2012 void sadalp(const VRegister& vd, const VRegister& vn); 2013 2014 // Unsigned pairwise long add and accumulate. 2015 void uadalp(const VRegister& vd, const VRegister& vn); 2016 2017 // Shift left by immediate. 2018 void shl(const VRegister& vd, const VRegister& vn, int shift); 2019 2020 // Signed saturating shift left by immediate. 2021 void sqshl(const VRegister& vd, const VRegister& vn, int shift); 2022 2023 // Signed saturating shift left unsigned by immediate. 2024 void sqshlu(const VRegister& vd, const VRegister& vn, int shift); 2025 2026 // Unsigned saturating shift left by immediate. 2027 void uqshl(const VRegister& vd, const VRegister& vn, int shift); 2028 2029 // Signed shift left long by immediate. 2030 void sshll(const VRegister& vd, const VRegister& vn, int shift); 2031 2032 // Signed shift left long by immediate (second part). 2033 void sshll2(const VRegister& vd, const VRegister& vn, int shift); 2034 2035 // Signed extend long. 2036 void sxtl(const VRegister& vd, const VRegister& vn); 2037 2038 // Signed extend long (second part). 2039 void sxtl2(const VRegister& vd, const VRegister& vn); 2040 2041 // Unsigned shift left long by immediate. 2042 void ushll(const VRegister& vd, const VRegister& vn, int shift); 2043 2044 // Unsigned shift left long by immediate (second part). 2045 void ushll2(const VRegister& vd, const VRegister& vn, int shift); 2046 2047 // Shift left long by element size. 2048 void shll(const VRegister& vd, const VRegister& vn, int shift); 2049 2050 // Shift left long by element size (second part). 2051 void shll2(const VRegister& vd, const VRegister& vn, int shift); 2052 2053 // Unsigned extend long. 2054 void uxtl(const VRegister& vd, const VRegister& vn); 2055 2056 // Unsigned extend long (second part). 2057 void uxtl2(const VRegister& vd, const VRegister& vn); 2058 2059 // Shift left by immediate and insert. 2060 void sli(const VRegister& vd, const VRegister& vn, int shift); 2061 2062 // Shift right by immediate and insert. 2063 void sri(const VRegister& vd, const VRegister& vn, int shift); 2064 2065 // Signed maximum. 2066 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2067 2068 // Signed pairwise maximum. 2069 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2070 2071 // Add across vector. 2072 void addv(const VRegister& vd, const VRegister& vn); 2073 2074 // Signed add long across vector. 2075 void saddlv(const VRegister& vd, const VRegister& vn); 2076 2077 // Unsigned add long across vector. 2078 void uaddlv(const VRegister& vd, const VRegister& vn); 2079 2080 // FP maximum number across vector. 2081 void fmaxnmv(const VRegister& vd, const VRegister& vn); 2082 2083 // FP maximum across vector. 2084 void fmaxv(const VRegister& vd, const VRegister& vn); 2085 2086 // FP minimum number across vector. 2087 void fminnmv(const VRegister& vd, const VRegister& vn); 2088 2089 // FP minimum across vector. 2090 void fminv(const VRegister& vd, const VRegister& vn); 2091 2092 // Signed maximum across vector. 2093 void smaxv(const VRegister& vd, const VRegister& vn); 2094 2095 // Signed minimum. 2096 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2097 2098 // Signed minimum pairwise. 2099 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2100 2101 // Signed minimum across vector. 2102 void sminv(const VRegister& vd, const VRegister& vn); 2103 2104 // One-element structure store from one register. 2105 void st1(const VRegister& vt, const MemOperand& src); 2106 2107 // One-element structure store from two registers. 2108 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 2109 2110 // One-element structure store from three registers. 2111 void st1(const VRegister& vt, 2112 const VRegister& vt2, 2113 const VRegister& vt3, 2114 const MemOperand& src); 2115 2116 // One-element structure store from four registers. 2117 void st1(const VRegister& vt, 2118 const VRegister& vt2, 2119 const VRegister& vt3, 2120 const VRegister& vt4, 2121 const MemOperand& src); 2122 2123 // One-element single structure store from one lane. 2124 void st1(const VRegister& vt, int lane, const MemOperand& src); 2125 2126 // Two-element structure store from two registers. 2127 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 2128 2129 // Two-element single structure store from two lanes. 2130 void st2(const VRegister& vt, 2131 const VRegister& vt2, 2132 int lane, 2133 const MemOperand& src); 2134 2135 // Three-element structure store from three registers. 2136 void st3(const VRegister& vt, 2137 const VRegister& vt2, 2138 const VRegister& vt3, 2139 const MemOperand& src); 2140 2141 // Three-element single structure store from three lanes. 2142 void st3(const VRegister& vt, 2143 const VRegister& vt2, 2144 const VRegister& vt3, 2145 int lane, 2146 const MemOperand& src); 2147 2148 // Four-element structure store from four registers. 2149 void st4(const VRegister& vt, 2150 const VRegister& vt2, 2151 const VRegister& vt3, 2152 const VRegister& vt4, 2153 const MemOperand& src); 2154 2155 // Four-element single structure store from four lanes. 2156 void st4(const VRegister& vt, 2157 const VRegister& vt2, 2158 const VRegister& vt3, 2159 const VRegister& vt4, 2160 int lane, 2161 const MemOperand& src); 2162 2163 // Unsigned add long. 2164 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2165 2166 // Unsigned add long (second part). 2167 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2168 2169 // Unsigned add wide. 2170 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2171 2172 // Unsigned add wide (second part). 2173 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2174 2175 // Signed add long. 2176 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2177 2178 // Signed add long (second part). 2179 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2180 2181 // Signed add wide. 2182 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2183 2184 // Signed add wide (second part). 2185 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2186 2187 // Unsigned subtract long. 2188 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2189 2190 // Unsigned subtract long (second part). 2191 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2192 2193 // Unsigned subtract wide. 2194 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2195 2196 // Unsigned subtract wide (second part). 2197 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2198 2199 // Signed subtract long. 2200 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2201 2202 // Signed subtract long (second part). 2203 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2204 2205 // Signed integer subtract wide. 2206 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2207 2208 // Signed integer subtract wide (second part). 2209 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2210 2211 // Unsigned maximum. 2212 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2213 2214 // Unsigned pairwise maximum. 2215 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2216 2217 // Unsigned maximum across vector. 2218 void umaxv(const VRegister& vd, const VRegister& vn); 2219 2220 // Unsigned minimum. 2221 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2222 2223 // Unsigned pairwise minimum. 2224 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2225 2226 // Unsigned minimum across vector. 2227 void uminv(const VRegister& vd, const VRegister& vn); 2228 2229 // Transpose vectors (primary). 2230 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2231 2232 // Transpose vectors (secondary). 2233 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2234 2235 // Unzip vectors (primary). 2236 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2237 2238 // Unzip vectors (secondary). 2239 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2240 2241 // Zip vectors (primary). 2242 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2243 2244 // Zip vectors (secondary). 2245 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2246 2247 // Signed shift right by immediate. 2248 void sshr(const VRegister& vd, const VRegister& vn, int shift); 2249 2250 // Unsigned shift right by immediate. 2251 void ushr(const VRegister& vd, const VRegister& vn, int shift); 2252 2253 // Signed rounding shift right by immediate. 2254 void srshr(const VRegister& vd, const VRegister& vn, int shift); 2255 2256 // Unsigned rounding shift right by immediate. 2257 void urshr(const VRegister& vd, const VRegister& vn, int shift); 2258 2259 // Signed shift right by immediate and accumulate. 2260 void ssra(const VRegister& vd, const VRegister& vn, int shift); 2261 2262 // Unsigned shift right by immediate and accumulate. 2263 void usra(const VRegister& vd, const VRegister& vn, int shift); 2264 2265 // Signed rounding shift right by immediate and accumulate. 2266 void srsra(const VRegister& vd, const VRegister& vn, int shift); 2267 2268 // Unsigned rounding shift right by immediate and accumulate. 2269 void ursra(const VRegister& vd, const VRegister& vn, int shift); 2270 2271 // Shift right narrow by immediate. 2272 void shrn(const VRegister& vd, const VRegister& vn, int shift); 2273 2274 // Shift right narrow by immediate (second part). 2275 void shrn2(const VRegister& vd, const VRegister& vn, int shift); 2276 2277 // Rounding shift right narrow by immediate. 2278 void rshrn(const VRegister& vd, const VRegister& vn, int shift); 2279 2280 // Rounding shift right narrow by immediate (second part). 2281 void rshrn2(const VRegister& vd, const VRegister& vn, int shift); 2282 2283 // Unsigned saturating shift right narrow by immediate. 2284 void uqshrn(const VRegister& vd, const VRegister& vn, int shift); 2285 2286 // Unsigned saturating shift right narrow by immediate (second part). 2287 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); 2288 2289 // Unsigned saturating rounding shift right narrow by immediate. 2290 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); 2291 2292 // Unsigned saturating rounding shift right narrow by immediate (second part). 2293 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 2294 2295 // Signed saturating shift right narrow by immediate. 2296 void sqshrn(const VRegister& vd, const VRegister& vn, int shift); 2297 2298 // Signed saturating shift right narrow by immediate (second part). 2299 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); 2300 2301 // Signed saturating rounded shift right narrow by immediate. 2302 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); 2303 2304 // Signed saturating rounded shift right narrow by immediate (second part). 2305 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 2306 2307 // Signed saturating shift right unsigned narrow by immediate. 2308 void sqshrun(const VRegister& vd, const VRegister& vn, int shift); 2309 2310 // Signed saturating shift right unsigned narrow by immediate (second part). 2311 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); 2312 2313 // Signed sat rounded shift right unsigned narrow by immediate. 2314 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); 2315 2316 // Signed sat rounded shift right unsigned narrow by immediate (second part). 2317 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); 2318 2319 // FP reciprocal step. 2320 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2321 2322 // FP reciprocal estimate. 2323 void frecpe(const VRegister& vd, const VRegister& vn); 2324 2325 // FP reciprocal square root estimate. 2326 void frsqrte(const VRegister& vd, const VRegister& vn); 2327 2328 // FP reciprocal square root step. 2329 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2330 2331 // Signed absolute difference and accumulate long. 2332 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2333 2334 // Signed absolute difference and accumulate long (second part). 2335 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2336 2337 // Unsigned absolute difference and accumulate long. 2338 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2339 2340 // Unsigned absolute difference and accumulate long (second part). 2341 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2342 2343 // Signed absolute difference long. 2344 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2345 2346 // Signed absolute difference long (second part). 2347 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2348 2349 // Unsigned absolute difference long. 2350 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2351 2352 // Unsigned absolute difference long (second part). 2353 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2354 2355 // Polynomial multiply long. 2356 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2357 2358 // Polynomial multiply long (second part). 2359 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2360 2361 // Signed long multiply-add. 2362 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2363 2364 // Signed long multiply-add (second part). 2365 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2366 2367 // Unsigned long multiply-add. 2368 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2369 2370 // Unsigned long multiply-add (second part). 2371 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2372 2373 // Signed long multiply-sub. 2374 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2375 2376 // Signed long multiply-sub (second part). 2377 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2378 2379 // Unsigned long multiply-sub. 2380 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2381 2382 // Unsigned long multiply-sub (second part). 2383 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2384 2385 // Signed long multiply. 2386 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2387 2388 // Signed long multiply (second part). 2389 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2390 2391 // Signed saturating doubling long multiply-add. 2392 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2393 2394 // Signed saturating doubling long multiply-add (second part). 2395 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2396 2397 // Signed saturating doubling long multiply-subtract. 2398 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2399 2400 // Signed saturating doubling long multiply-subtract (second part). 2401 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2402 2403 // Signed saturating doubling long multiply. 2404 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2405 2406 // Signed saturating doubling long multiply (second part). 2407 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2408 2409 // Signed saturating doubling multiply returning high half. 2410 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2411 2412 // Signed saturating rounding doubling multiply returning high half. 2413 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2414 2415 // Signed saturating doubling multiply element returning high half. 2416 void sqdmulh(const VRegister& vd, 2417 const VRegister& vn, 2418 const VRegister& vm, 2419 int vm_index); 2420 2421 // Signed saturating rounding doubling multiply element returning high half. 2422 void sqrdmulh(const VRegister& vd, 2423 const VRegister& vn, 2424 const VRegister& vm, 2425 int vm_index); 2426 2427 // Unsigned long multiply long. 2428 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2429 2430 // Unsigned long multiply (second part). 2431 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2432 2433 // Add narrow returning high half. 2434 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2435 2436 // Add narrow returning high half (second part). 2437 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2438 2439 // Rounding add narrow returning high half. 2440 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2441 2442 // Rounding add narrow returning high half (second part). 2443 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2444 2445 // Subtract narrow returning high half. 2446 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2447 2448 // Subtract narrow returning high half (second part). 2449 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2450 2451 // Rounding subtract narrow returning high half. 2452 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2453 2454 // Rounding subtract narrow returning high half (second part). 2455 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2456 2457 // FP vector multiply accumulate. 2458 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2459 2460 // FP vector multiply subtract. 2461 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2462 2463 // FP vector multiply extended. 2464 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2465 2466 // FP absolute greater than or equal. 2467 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2468 2469 // FP absolute greater than. 2470 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2471 2472 // FP multiply by element. 2473 void fmul(const VRegister& vd, 2474 const VRegister& vn, 2475 const VRegister& vm, 2476 int vm_index); 2477 2478 // FP fused multiply-add to accumulator by element. 2479 void fmla(const VRegister& vd, 2480 const VRegister& vn, 2481 const VRegister& vm, 2482 int vm_index); 2483 2484 // FP fused multiply-sub from accumulator by element. 2485 void fmls(const VRegister& vd, 2486 const VRegister& vn, 2487 const VRegister& vm, 2488 int vm_index); 2489 2490 // FP multiply extended by element. 2491 void fmulx(const VRegister& vd, 2492 const VRegister& vn, 2493 const VRegister& vm, 2494 int vm_index); 2495 2496 // FP compare equal. 2497 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2498 2499 // FP greater than. 2500 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2501 2502 // FP greater than or equal. 2503 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2504 2505 // FP compare equal to zero. 2506 void fcmeq(const VRegister& vd, const VRegister& vn, double imm); 2507 2508 // FP greater than zero. 2509 void fcmgt(const VRegister& vd, const VRegister& vn, double imm); 2510 2511 // FP greater than or equal to zero. 2512 void fcmge(const VRegister& vd, const VRegister& vn, double imm); 2513 2514 // FP less than or equal to zero. 2515 void fcmle(const VRegister& vd, const VRegister& vn, double imm); 2516 2517 // FP less than to zero. 2518 void fcmlt(const VRegister& vd, const VRegister& vn, double imm); 2519 2520 // FP absolute difference. 2521 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2522 2523 // FP pairwise add vector. 2524 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2525 2526 // FP pairwise add scalar. 2527 void faddp(const VRegister& vd, const VRegister& vn); 2528 2529 // FP pairwise maximum vector. 2530 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2531 2532 // FP pairwise maximum scalar. 2533 void fmaxp(const VRegister& vd, const VRegister& vn); 2534 2535 // FP pairwise minimum vector. 2536 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2537 2538 // FP pairwise minimum scalar. 2539 void fminp(const VRegister& vd, const VRegister& vn); 2540 2541 // FP pairwise maximum number vector. 2542 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2543 2544 // FP pairwise maximum number scalar. 2545 void fmaxnmp(const VRegister& vd, const VRegister& vn); 2546 2547 // FP pairwise minimum number vector. 2548 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2549 2550 // FP pairwise minimum number scalar. 2551 void fminnmp(const VRegister& vd, const VRegister& vn); 2552 2553 // Emit generic instructions. 2554 // Emit raw instructions into the instruction stream. 2555 void dci(Instr raw_inst) { Emit(raw_inst); } 2556 2557 // Emit 32 bits of data into the instruction stream. 2558 void dc32(uint32_t data) { dc(data); } 2559 2560 // Emit 64 bits of data into the instruction stream. 2561 void dc64(uint64_t data) { dc(data); } 2562 2563 // Emit data in the instruction stream. 2564 template <typename T> 2565 void dc(T data) { 2566 VIXL_ASSERT(AllowAssembler()); 2567 GetBuffer()->Emit<T>(data); 2568 } 2569 2570 // Copy a string into the instruction stream, including the terminating NULL 2571 // character. The instruction pointer is then aligned correctly for 2572 // subsequent instructions. 2573 void EmitString(const char* string) { 2574 VIXL_ASSERT(string != NULL); 2575 VIXL_ASSERT(AllowAssembler()); 2576 2577 GetBuffer()->EmitString(string); 2578 GetBuffer()->Align(); 2579 } 2580 2581 // Code generation helpers. 2582 2583 // Register encoding. 2584 static Instr Rd(CPURegister rd) { 2585 VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode); 2586 return rd.GetCode() << Rd_offset; 2587 } 2588 2589 static Instr Rn(CPURegister rn) { 2590 VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode); 2591 return rn.GetCode() << Rn_offset; 2592 } 2593 2594 static Instr Rm(CPURegister rm) { 2595 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); 2596 return rm.GetCode() << Rm_offset; 2597 } 2598 2599 static Instr RmNot31(CPURegister rm) { 2600 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); 2601 VIXL_ASSERT(!rm.IsZero()); 2602 return Rm(rm); 2603 } 2604 2605 static Instr Ra(CPURegister ra) { 2606 VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode); 2607 return ra.GetCode() << Ra_offset; 2608 } 2609 2610 static Instr Rt(CPURegister rt) { 2611 VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode); 2612 return rt.GetCode() << Rt_offset; 2613 } 2614 2615 static Instr Rt2(CPURegister rt2) { 2616 VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode); 2617 return rt2.GetCode() << Rt2_offset; 2618 } 2619 2620 static Instr Rs(CPURegister rs) { 2621 VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode); 2622 return rs.GetCode() << Rs_offset; 2623 } 2624 2625 // These encoding functions allow the stack pointer to be encoded, and 2626 // disallow the zero register. 2627 static Instr RdSP(Register rd) { 2628 VIXL_ASSERT(!rd.IsZero()); 2629 return (rd.GetCode() & kRegCodeMask) << Rd_offset; 2630 } 2631 2632 static Instr RnSP(Register rn) { 2633 VIXL_ASSERT(!rn.IsZero()); 2634 return (rn.GetCode() & kRegCodeMask) << Rn_offset; 2635 } 2636 2637 // Flags encoding. 2638 static Instr Flags(FlagsUpdate S) { 2639 if (S == SetFlags) { 2640 return 1 << FlagsUpdate_offset; 2641 } else if (S == LeaveFlags) { 2642 return 0 << FlagsUpdate_offset; 2643 } 2644 VIXL_UNREACHABLE(); 2645 return 0; 2646 } 2647 2648 static Instr Cond(Condition cond) { return cond << Condition_offset; } 2649 2650 // PC-relative address encoding. 2651 static Instr ImmPCRelAddress(int64_t imm21) { 2652 VIXL_ASSERT(IsInt21(imm21)); 2653 Instr imm = static_cast<Instr>(TruncateToUint21(imm21)); 2654 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset; 2655 Instr immlo = imm << ImmPCRelLo_offset; 2656 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask); 2657 } 2658 2659 // Branch encoding. 2660 static Instr ImmUncondBranch(int64_t imm26) { 2661 VIXL_ASSERT(IsInt26(imm26)); 2662 return TruncateToUint26(imm26) << ImmUncondBranch_offset; 2663 } 2664 2665 static Instr ImmCondBranch(int64_t imm19) { 2666 VIXL_ASSERT(IsInt19(imm19)); 2667 return TruncateToUint19(imm19) << ImmCondBranch_offset; 2668 } 2669 2670 static Instr ImmCmpBranch(int64_t imm19) { 2671 VIXL_ASSERT(IsInt19(imm19)); 2672 return TruncateToUint19(imm19) << ImmCmpBranch_offset; 2673 } 2674 2675 static Instr ImmTestBranch(int64_t imm14) { 2676 VIXL_ASSERT(IsInt14(imm14)); 2677 return TruncateToUint14(imm14) << ImmTestBranch_offset; 2678 } 2679 2680 static Instr ImmTestBranchBit(unsigned bit_pos) { 2681 VIXL_ASSERT(IsUint6(bit_pos)); 2682 // Subtract five from the shift offset, as we need bit 5 from bit_pos. 2683 unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5); 2684 unsigned b40 = bit_pos << ImmTestBranchBit40_offset; 2685 b5 &= ImmTestBranchBit5_mask; 2686 b40 &= ImmTestBranchBit40_mask; 2687 return b5 | b40; 2688 } 2689 2690 // Data Processing encoding. 2691 static Instr SF(Register rd) { 2692 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits; 2693 } 2694 2695 static Instr ImmAddSub(int imm) { 2696 VIXL_ASSERT(IsImmAddSub(imm)); 2697 if (IsUint12(imm)) { // No shift required. 2698 imm <<= ImmAddSub_offset; 2699 } else { 2700 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset); 2701 } 2702 return imm; 2703 } 2704 2705 static Instr ImmS(unsigned imms, unsigned reg_size) { 2706 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) || 2707 ((reg_size == kWRegSize) && IsUint5(imms))); 2708 USE(reg_size); 2709 return imms << ImmS_offset; 2710 } 2711 2712 static Instr ImmR(unsigned immr, unsigned reg_size) { 2713 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || 2714 ((reg_size == kWRegSize) && IsUint5(immr))); 2715 USE(reg_size); 2716 VIXL_ASSERT(IsUint6(immr)); 2717 return immr << ImmR_offset; 2718 } 2719 2720 static Instr ImmSetBits(unsigned imms, unsigned reg_size) { 2721 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 2722 VIXL_ASSERT(IsUint6(imms)); 2723 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3)); 2724 USE(reg_size); 2725 return imms << ImmSetBits_offset; 2726 } 2727 2728 static Instr ImmRotate(unsigned immr, unsigned reg_size) { 2729 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 2730 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || 2731 ((reg_size == kWRegSize) && IsUint5(immr))); 2732 USE(reg_size); 2733 return immr << ImmRotate_offset; 2734 } 2735 2736 static Instr ImmLLiteral(int64_t imm19) { 2737 VIXL_ASSERT(IsInt19(imm19)); 2738 return TruncateToUint19(imm19) << ImmLLiteral_offset; 2739 } 2740 2741 static Instr BitN(unsigned bitn, unsigned reg_size) { 2742 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 2743 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0)); 2744 USE(reg_size); 2745 return bitn << BitN_offset; 2746 } 2747 2748 static Instr ShiftDP(Shift shift) { 2749 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR); 2750 return shift << ShiftDP_offset; 2751 } 2752 2753 static Instr ImmDPShift(unsigned amount) { 2754 VIXL_ASSERT(IsUint6(amount)); 2755 return amount << ImmDPShift_offset; 2756 } 2757 2758 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; } 2759 2760 static Instr ImmExtendShift(unsigned left_shift) { 2761 VIXL_ASSERT(left_shift <= 4); 2762 return left_shift << ImmExtendShift_offset; 2763 } 2764 2765 static Instr ImmCondCmp(unsigned imm) { 2766 VIXL_ASSERT(IsUint5(imm)); 2767 return imm << ImmCondCmp_offset; 2768 } 2769 2770 static Instr Nzcv(StatusFlags nzcv) { 2771 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset; 2772 } 2773 2774 // MemOperand offset encoding. 2775 static Instr ImmLSUnsigned(int64_t imm12) { 2776 VIXL_ASSERT(IsUint12(imm12)); 2777 return TruncateToUint12(imm12) << ImmLSUnsigned_offset; 2778 } 2779 2780 static Instr ImmLS(int64_t imm9) { 2781 VIXL_ASSERT(IsInt9(imm9)); 2782 return TruncateToUint9(imm9) << ImmLS_offset; 2783 } 2784 2785 static Instr ImmLSPair(int64_t imm7, unsigned access_size) { 2786 VIXL_ASSERT(IsMultiple(imm7, 1 << access_size)); 2787 int64_t scaled_imm7 = imm7 / (1 << access_size); 2788 VIXL_ASSERT(IsInt7(scaled_imm7)); 2789 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset; 2790 } 2791 2792 static Instr ImmShiftLS(unsigned shift_amount) { 2793 VIXL_ASSERT(IsUint1(shift_amount)); 2794 return shift_amount << ImmShiftLS_offset; 2795 } 2796 2797 static Instr ImmPrefetchOperation(int imm5) { 2798 VIXL_ASSERT(IsUint5(imm5)); 2799 return imm5 << ImmPrefetchOperation_offset; 2800 } 2801 2802 static Instr ImmException(int imm16) { 2803 VIXL_ASSERT(IsUint16(imm16)); 2804 return imm16 << ImmException_offset; 2805 } 2806 2807 static Instr ImmSystemRegister(int imm16) { 2808 VIXL_ASSERT(IsUint16(imm16)); 2809 return imm16 << ImmSystemRegister_offset; 2810 } 2811 2812 static Instr ImmHint(int imm7) { 2813 VIXL_ASSERT(IsUint7(imm7)); 2814 return imm7 << ImmHint_offset; 2815 } 2816 2817 static Instr CRm(int imm4) { 2818 VIXL_ASSERT(IsUint4(imm4)); 2819 return imm4 << CRm_offset; 2820 } 2821 2822 static Instr CRn(int imm4) { 2823 VIXL_ASSERT(IsUint4(imm4)); 2824 return imm4 << CRn_offset; 2825 } 2826 2827 static Instr SysOp(int imm14) { 2828 VIXL_ASSERT(IsUint14(imm14)); 2829 return imm14 << SysOp_offset; 2830 } 2831 2832 static Instr ImmSysOp1(int imm3) { 2833 VIXL_ASSERT(IsUint3(imm3)); 2834 return imm3 << SysOp1_offset; 2835 } 2836 2837 static Instr ImmSysOp2(int imm3) { 2838 VIXL_ASSERT(IsUint3(imm3)); 2839 return imm3 << SysOp2_offset; 2840 } 2841 2842 static Instr ImmBarrierDomain(int imm2) { 2843 VIXL_ASSERT(IsUint2(imm2)); 2844 return imm2 << ImmBarrierDomain_offset; 2845 } 2846 2847 static Instr ImmBarrierType(int imm2) { 2848 VIXL_ASSERT(IsUint2(imm2)); 2849 return imm2 << ImmBarrierType_offset; 2850 } 2851 2852 // Move immediates encoding. 2853 static Instr ImmMoveWide(uint64_t imm) { 2854 VIXL_ASSERT(IsUint16(imm)); 2855 return static_cast<Instr>(imm << ImmMoveWide_offset); 2856 } 2857 2858 static Instr ShiftMoveWide(int64_t shift) { 2859 VIXL_ASSERT(IsUint2(shift)); 2860 return static_cast<Instr>(shift << ShiftMoveWide_offset); 2861 } 2862 2863 // FP Immediates. 2864 static Instr ImmFP32(float imm); 2865 static Instr ImmFP64(double imm); 2866 2867 // FP register type. 2868 static Instr FPType(FPRegister fd) { return fd.Is64Bits() ? FP64 : FP32; } 2869 2870 static Instr FPScale(unsigned scale) { 2871 VIXL_ASSERT(IsUint6(scale)); 2872 return scale << FPScale_offset; 2873 } 2874 2875 // Immediate field checking helpers. 2876 static bool IsImmAddSub(int64_t immediate); 2877 static bool IsImmConditionalCompare(int64_t immediate); 2878 static bool IsImmFP32(float imm); 2879 static bool IsImmFP64(double imm); 2880 static bool IsImmLogical(uint64_t value, 2881 unsigned width, 2882 unsigned* n = NULL, 2883 unsigned* imm_s = NULL, 2884 unsigned* imm_r = NULL); 2885 static bool IsImmLSPair(int64_t offset, unsigned access_size); 2886 static bool IsImmLSScaled(int64_t offset, unsigned access_size); 2887 static bool IsImmLSUnscaled(int64_t offset); 2888 static bool IsImmMovn(uint64_t imm, unsigned reg_size); 2889 static bool IsImmMovz(uint64_t imm, unsigned reg_size); 2890 2891 // Instruction bits for vector format in data processing operations. 2892 static Instr VFormat(VRegister vd) { 2893 if (vd.Is64Bits()) { 2894 switch (vd.GetLanes()) { 2895 case 2: 2896 return NEON_2S; 2897 case 4: 2898 return NEON_4H; 2899 case 8: 2900 return NEON_8B; 2901 default: 2902 return 0xffffffff; 2903 } 2904 } else { 2905 VIXL_ASSERT(vd.Is128Bits()); 2906 switch (vd.GetLanes()) { 2907 case 2: 2908 return NEON_2D; 2909 case 4: 2910 return NEON_4S; 2911 case 8: 2912 return NEON_8H; 2913 case 16: 2914 return NEON_16B; 2915 default: 2916 return 0xffffffff; 2917 } 2918 } 2919 } 2920 2921 // Instruction bits for vector format in floating point data processing 2922 // operations. 2923 static Instr FPFormat(VRegister vd) { 2924 if (vd.GetLanes() == 1) { 2925 // Floating point scalar formats. 2926 VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits()); 2927 return vd.Is64Bits() ? FP64 : FP32; 2928 } 2929 2930 // Two lane floating point vector formats. 2931 if (vd.GetLanes() == 2) { 2932 VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits()); 2933 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; 2934 } 2935 2936 // Four lane floating point vector format. 2937 VIXL_ASSERT((vd.GetLanes() == 4) && vd.Is128Bits()); 2938 return NEON_FP_4S; 2939 } 2940 2941 // Instruction bits for vector format in load and store operations. 2942 static Instr LSVFormat(VRegister vd) { 2943 if (vd.Is64Bits()) { 2944 switch (vd.GetLanes()) { 2945 case 1: 2946 return LS_NEON_1D; 2947 case 2: 2948 return LS_NEON_2S; 2949 case 4: 2950 return LS_NEON_4H; 2951 case 8: 2952 return LS_NEON_8B; 2953 default: 2954 return 0xffffffff; 2955 } 2956 } else { 2957 VIXL_ASSERT(vd.Is128Bits()); 2958 switch (vd.GetLanes()) { 2959 case 2: 2960 return LS_NEON_2D; 2961 case 4: 2962 return LS_NEON_4S; 2963 case 8: 2964 return LS_NEON_8H; 2965 case 16: 2966 return LS_NEON_16B; 2967 default: 2968 return 0xffffffff; 2969 } 2970 } 2971 } 2972 2973 // Instruction bits for scalar format in data processing operations. 2974 static Instr SFormat(VRegister vd) { 2975 VIXL_ASSERT(vd.GetLanes() == 1); 2976 switch (vd.GetSizeInBytes()) { 2977 case 1: 2978 return NEON_B; 2979 case 2: 2980 return NEON_H; 2981 case 4: 2982 return NEON_S; 2983 case 8: 2984 return NEON_D; 2985 default: 2986 return 0xffffffff; 2987 } 2988 } 2989 2990 static Instr ImmNEONHLM(int index, int num_bits) { 2991 int h, l, m; 2992 if (num_bits == 3) { 2993 VIXL_ASSERT(IsUint3(index)); 2994 h = (index >> 2) & 1; 2995 l = (index >> 1) & 1; 2996 m = (index >> 0) & 1; 2997 } else if (num_bits == 2) { 2998 VIXL_ASSERT(IsUint2(index)); 2999 h = (index >> 1) & 1; 3000 l = (index >> 0) & 1; 3001 m = 0; 3002 } else { 3003 VIXL_ASSERT(IsUint1(index) && (num_bits == 1)); 3004 h = (index >> 0) & 1; 3005 l = 0; 3006 m = 0; 3007 } 3008 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); 3009 } 3010 3011 static Instr ImmNEONExt(int imm4) { 3012 VIXL_ASSERT(IsUint4(imm4)); 3013 return imm4 << ImmNEONExt_offset; 3014 } 3015 3016 static Instr ImmNEON5(Instr format, int index) { 3017 VIXL_ASSERT(IsUint4(index)); 3018 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 3019 int imm5 = (index << (s + 1)) | (1 << s); 3020 return imm5 << ImmNEON5_offset; 3021 } 3022 3023 static Instr ImmNEON4(Instr format, int index) { 3024 VIXL_ASSERT(IsUint4(index)); 3025 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 3026 int imm4 = index << s; 3027 return imm4 << ImmNEON4_offset; 3028 } 3029 3030 static Instr ImmNEONabcdefgh(int imm8) { 3031 VIXL_ASSERT(IsUint8(imm8)); 3032 Instr instr; 3033 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; 3034 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; 3035 return instr; 3036 } 3037 3038 static Instr NEONCmode(int cmode) { 3039 VIXL_ASSERT(IsUint4(cmode)); 3040 return cmode << NEONCmode_offset; 3041 } 3042 3043 static Instr NEONModImmOp(int op) { 3044 VIXL_ASSERT(IsUint1(op)); 3045 return op << NEONModImmOp_offset; 3046 } 3047 3048 // Size of the code generated since label to the current position. 3049 size_t GetSizeOfCodeGeneratedSince(Label* label) const { 3050 VIXL_ASSERT(label->IsBound()); 3051 return GetBuffer().GetOffsetFrom(label->GetLocation()); 3052 } 3053 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince", 3054 size_t SizeOfCodeGeneratedSince(Label* label) const) { 3055 return GetSizeOfCodeGeneratedSince(label); 3056 } 3057 3058 VIXL_DEPRECATED("GetBuffer().GetCapacity()", 3059 size_t GetBufferCapacity() const) { 3060 return GetBuffer().GetCapacity(); 3061 } 3062 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) { 3063 return GetBuffer().GetCapacity(); 3064 } 3065 3066 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()", 3067 size_t GetRemainingBufferSpace() const) { 3068 return GetBuffer().GetRemainingBytes(); 3069 } 3070 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()", 3071 size_t RemainingBufferSpace() const) { 3072 return GetBuffer().GetRemainingBytes(); 3073 } 3074 3075 PositionIndependentCodeOption GetPic() const { return pic_; } 3076 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) { 3077 return GetPic(); 3078 } 3079 3080 bool AllowPageOffsetDependentCode() const { 3081 return (GetPic() == PageOffsetDependentCode) || 3082 (GetPic() == PositionDependentCode); 3083 } 3084 3085 static Register AppropriateZeroRegFor(const CPURegister& reg) { 3086 return reg.Is64Bits() ? Register(xzr) : Register(wzr); 3087 } 3088 3089 protected: 3090 void LoadStore(const CPURegister& rt, 3091 const MemOperand& addr, 3092 LoadStoreOp op, 3093 LoadStoreScalingOption option = PreferScaledOffset); 3094 3095 void LoadStorePair(const CPURegister& rt, 3096 const CPURegister& rt2, 3097 const MemOperand& addr, 3098 LoadStorePairOp op); 3099 void LoadStoreStruct(const VRegister& vt, 3100 const MemOperand& addr, 3101 NEONLoadStoreMultiStructOp op); 3102 void LoadStoreStruct1(const VRegister& vt, 3103 int reg_count, 3104 const MemOperand& addr); 3105 void LoadStoreStructSingle(const VRegister& vt, 3106 uint32_t lane, 3107 const MemOperand& addr, 3108 NEONLoadStoreSingleStructOp op); 3109 void LoadStoreStructSingleAllLanes(const VRegister& vt, 3110 const MemOperand& addr, 3111 NEONLoadStoreSingleStructOp op); 3112 void LoadStoreStructVerify(const VRegister& vt, 3113 const MemOperand& addr, 3114 Instr op); 3115 3116 void Prefetch(PrefetchOperation op, 3117 const MemOperand& addr, 3118 LoadStoreScalingOption option = PreferScaledOffset); 3119 3120 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2 3121 // reports a bogus uninitialised warning then. 3122 void Logical(const Register& rd, 3123 const Register& rn, 3124 const Operand operand, 3125 LogicalOp op); 3126 void LogicalImmediate(const Register& rd, 3127 const Register& rn, 3128 unsigned n, 3129 unsigned imm_s, 3130 unsigned imm_r, 3131 LogicalOp op); 3132 3133 void ConditionalCompare(const Register& rn, 3134 const Operand& operand, 3135 StatusFlags nzcv, 3136 Condition cond, 3137 ConditionalCompareOp op); 3138 3139 void AddSubWithCarry(const Register& rd, 3140 const Register& rn, 3141 const Operand& operand, 3142 FlagsUpdate S, 3143 AddSubWithCarryOp op); 3144 3145 3146 // Functions for emulating operands not directly supported by the instruction 3147 // set. 3148 void EmitShift(const Register& rd, 3149 const Register& rn, 3150 Shift shift, 3151 unsigned amount); 3152 void EmitExtendShift(const Register& rd, 3153 const Register& rn, 3154 Extend extend, 3155 unsigned left_shift); 3156 3157 void AddSub(const Register& rd, 3158 const Register& rn, 3159 const Operand& operand, 3160 FlagsUpdate S, 3161 AddSubOp op); 3162 3163 void NEONTable(const VRegister& vd, 3164 const VRegister& vn, 3165 const VRegister& vm, 3166 NEONTableOp op); 3167 3168 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified 3169 // registers. Only simple loads are supported; sign- and zero-extension (such 3170 // as in LDPSW_x or LDRB_w) are not supported. 3171 static LoadStoreOp LoadOpFor(const CPURegister& rt); 3172 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt, 3173 const CPURegister& rt2); 3174 static LoadStoreOp StoreOpFor(const CPURegister& rt); 3175 static LoadStorePairOp StorePairOpFor(const CPURegister& rt, 3176 const CPURegister& rt2); 3177 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor( 3178 const CPURegister& rt, const CPURegister& rt2); 3179 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor( 3180 const CPURegister& rt, const CPURegister& rt2); 3181 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); 3182 3183 3184 private: 3185 static uint32_t FP32ToImm8(float imm); 3186 static uint32_t FP64ToImm8(double imm); 3187 3188 // Instruction helpers. 3189 void MoveWide(const Register& rd, 3190 uint64_t imm, 3191 int shift, 3192 MoveWideImmediateOp mov_op); 3193 void DataProcShiftedRegister(const Register& rd, 3194 const Register& rn, 3195 const Operand& operand, 3196 FlagsUpdate S, 3197 Instr op); 3198 void DataProcExtendedRegister(const Register& rd, 3199 const Register& rn, 3200 const Operand& operand, 3201 FlagsUpdate S, 3202 Instr op); 3203 void LoadStorePairNonTemporal(const CPURegister& rt, 3204 const CPURegister& rt2, 3205 const MemOperand& addr, 3206 LoadStorePairNonTemporalOp op); 3207 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op); 3208 void ConditionalSelect(const Register& rd, 3209 const Register& rn, 3210 const Register& rm, 3211 Condition cond, 3212 ConditionalSelectOp op); 3213 void DataProcessing1Source(const Register& rd, 3214 const Register& rn, 3215 DataProcessing1SourceOp op); 3216 void DataProcessing3Source(const Register& rd, 3217 const Register& rn, 3218 const Register& rm, 3219 const Register& ra, 3220 DataProcessing3SourceOp op); 3221 void FPDataProcessing1Source(const VRegister& fd, 3222 const VRegister& fn, 3223 FPDataProcessing1SourceOp op); 3224 void FPDataProcessing3Source(const VRegister& fd, 3225 const VRegister& fn, 3226 const VRegister& fm, 3227 const VRegister& fa, 3228 FPDataProcessing3SourceOp op); 3229 void NEONAcrossLanesL(const VRegister& vd, 3230 const VRegister& vn, 3231 NEONAcrossLanesOp op); 3232 void NEONAcrossLanes(const VRegister& vd, 3233 const VRegister& vn, 3234 NEONAcrossLanesOp op); 3235 void NEONModifiedImmShiftLsl(const VRegister& vd, 3236 const int imm8, 3237 const int left_shift, 3238 NEONModifiedImmediateOp op); 3239 void NEONModifiedImmShiftMsl(const VRegister& vd, 3240 const int imm8, 3241 const int shift_amount, 3242 NEONModifiedImmediateOp op); 3243 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop); 3244 void NEON3Same(const VRegister& vd, 3245 const VRegister& vn, 3246 const VRegister& vm, 3247 NEON3SameOp vop); 3248 void NEONFP3Same(const VRegister& vd, 3249 const VRegister& vn, 3250 const VRegister& vm, 3251 Instr op); 3252 void NEON3DifferentL(const VRegister& vd, 3253 const VRegister& vn, 3254 const VRegister& vm, 3255 NEON3DifferentOp vop); 3256 void NEON3DifferentW(const VRegister& vd, 3257 const VRegister& vn, 3258 const VRegister& vm, 3259 NEON3DifferentOp vop); 3260 void NEON3DifferentHN(const VRegister& vd, 3261 const VRegister& vn, 3262 const VRegister& vm, 3263 NEON3DifferentOp vop); 3264 void NEONFP2RegMisc(const VRegister& vd, 3265 const VRegister& vn, 3266 NEON2RegMiscOp vop, 3267 double value = 0.0); 3268 void NEON2RegMisc(const VRegister& vd, 3269 const VRegister& vn, 3270 NEON2RegMiscOp vop, 3271 int value = 0); 3272 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); 3273 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); 3274 void NEONPerm(const VRegister& vd, 3275 const VRegister& vn, 3276 const VRegister& vm, 3277 NEONPermOp op); 3278 void NEONFPByElement(const VRegister& vd, 3279 const VRegister& vn, 3280 const VRegister& vm, 3281 int vm_index, 3282 NEONByIndexedElementOp op); 3283 void NEONByElement(const VRegister& vd, 3284 const VRegister& vn, 3285 const VRegister& vm, 3286 int vm_index, 3287 NEONByIndexedElementOp op); 3288 void NEONByElementL(const VRegister& vd, 3289 const VRegister& vn, 3290 const VRegister& vm, 3291 int vm_index, 3292 NEONByIndexedElementOp op); 3293 void NEONShiftImmediate(const VRegister& vd, 3294 const VRegister& vn, 3295 NEONShiftImmediateOp op, 3296 int immh_immb); 3297 void NEONShiftLeftImmediate(const VRegister& vd, 3298 const VRegister& vn, 3299 int shift, 3300 NEONShiftImmediateOp op); 3301 void NEONShiftRightImmediate(const VRegister& vd, 3302 const VRegister& vn, 3303 int shift, 3304 NEONShiftImmediateOp op); 3305 void NEONShiftImmediateL(const VRegister& vd, 3306 const VRegister& vn, 3307 int shift, 3308 NEONShiftImmediateOp op); 3309 void NEONShiftImmediateN(const VRegister& vd, 3310 const VRegister& vn, 3311 int shift, 3312 NEONShiftImmediateOp op); 3313 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); 3314 3315 Instr LoadStoreStructAddrModeField(const MemOperand& addr); 3316 3317 // Encode the specified MemOperand for the specified access size and scaling 3318 // preference. 3319 Instr LoadStoreMemOperand(const MemOperand& addr, 3320 unsigned access_size, 3321 LoadStoreScalingOption option); 3322 3323 // Link the current (not-yet-emitted) instruction to the specified label, then 3324 // return an offset to be encoded in the instruction. If the label is not yet 3325 // bound, an offset of 0 is returned. 3326 ptrdiff_t LinkAndGetByteOffsetTo(Label* label); 3327 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label); 3328 ptrdiff_t LinkAndGetPageOffsetTo(Label* label); 3329 3330 // A common implementation for the LinkAndGet<Type>OffsetTo helpers. 3331 template <int element_shift> 3332 ptrdiff_t LinkAndGetOffsetTo(Label* label); 3333 3334 // Literal load offset are in words (32-bit). 3335 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal); 3336 3337 // Emit the instruction in buffer_. 3338 void Emit(Instr instruction) { 3339 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize); 3340 VIXL_ASSERT(AllowAssembler()); 3341 GetBuffer()->Emit32(instruction); 3342 } 3343 3344 PositionIndependentCodeOption pic_; 3345 }; 3346 3347 3348 template <typename T> 3349 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) { 3350 return UpdateValue(new_value, 3351 assembler->GetBuffer().GetStartAddress<uint8_t*>()); 3352 } 3353 3354 3355 template <typename T> 3356 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) { 3357 return UpdateValue(high64, 3358 low64, 3359 assembler->GetBuffer().GetStartAddress<uint8_t*>()); 3360 } 3361 3362 3363 } // namespace aarch64 3364 3365 // Required InvalSet template specialisations. 3366 // TODO: These template specialisations should not live in this file. Move 3367 // Label out of the aarch64 namespace in order to share its implementation 3368 // later. 3369 #define INVAL_SET_TEMPLATE_PARAMETERS \ 3370 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \ 3371 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \ 3372 aarch64::Label::kReclaimFactor 3373 template <> 3374 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey( 3375 const ptrdiff_t& element) { 3376 return element; 3377 } 3378 template <> 3379 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element, 3380 ptrdiff_t key) { 3381 *element = key; 3382 } 3383 #undef INVAL_SET_TEMPLATE_PARAMETERS 3384 3385 } // namespace vixl 3386 3387 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_ 3388