1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_ 28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_ 29 30 #include "../assembler-base-vixl.h" 31 #include "../code-generation-scopes-vixl.h" 32 #include "../globals-vixl.h" 33 #include "../invalset-vixl.h" 34 #include "../utils-vixl.h" 35 36 #include "operands-aarch64.h" 37 38 namespace vixl { 39 namespace aarch64 { 40 41 class LabelTestHelper; // Forward declaration. 42 43 44 class Label { 45 public: 46 Label() : location_(kLocationUnbound) {} 47 ~Label() { 48 // All links to a label must have been resolved before it is destructed. 49 VIXL_ASSERT(!IsLinked()); 50 } 51 52 bool IsBound() const { return location_ >= 0; } 53 bool IsLinked() const { return !links_.empty(); } 54 55 ptrdiff_t GetLocation() const { return location_; } 56 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) { 57 return GetLocation(); 58 } 59 60 static const int kNPreallocatedLinks = 4; 61 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX; 62 static const size_t kReclaimFrom = 512; 63 static const size_t kReclaimFactor = 2; 64 65 typedef InvalSet<ptrdiff_t, 66 kNPreallocatedLinks, 67 ptrdiff_t, 68 kInvalidLinkKey, 69 kReclaimFrom, 70 kReclaimFactor> LinksSetBase; 71 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase; 72 73 private: 74 class LinksSet : public LinksSetBase { 75 public: 76 LinksSet() : LinksSetBase() {} 77 }; 78 79 // Allows iterating over the links of a label. The behaviour is undefined if 80 // the list of links is modified in any way while iterating. 81 class LabelLinksIterator : public LabelLinksIteratorBase { 82 public: 83 explicit LabelLinksIterator(Label* label) 84 : LabelLinksIteratorBase(&label->links_) {} 85 86 // TODO: Remove these and use the STL-like interface instead. 87 using LabelLinksIteratorBase::Advance; 88 using LabelLinksIteratorBase::Current; 89 }; 90 91 void Bind(ptrdiff_t location) { 92 // Labels can only be bound once. 93 VIXL_ASSERT(!IsBound()); 94 location_ = location; 95 } 96 97 void AddLink(ptrdiff_t instruction) { 98 // If a label is bound, the assembler already has the information it needs 99 // to write the instruction, so there is no need to add it to links_. 100 VIXL_ASSERT(!IsBound()); 101 links_.insert(instruction); 102 } 103 104 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); } 105 106 void ClearAllLinks() { links_.clear(); } 107 108 // TODO: The comment below considers average case complexity for our 109 // usual use-cases. The elements of interest are: 110 // - Branches to a label are emitted in order: branch instructions to a label 111 // are generated at an offset in the code generation buffer greater than any 112 // other branch to that same label already generated. As an example, this can 113 // be broken when an instruction is patched to become a branch. Note that the 114 // code will still work, but the complexity considerations below may locally 115 // not apply any more. 116 // - Veneers are generated in order: for multiple branches of the same type 117 // branching to the same unbound label going out of range, veneers are 118 // generated in growing order of the branch instruction offset from the start 119 // of the buffer. 120 // 121 // When creating a veneer for a branch going out of range, the link for this 122 // branch needs to be removed from this `links_`. Since all branches are 123 // tracked in one underlying InvalSet, the complexity for this deletion is the 124 // same as for finding the element, ie. O(n), where n is the number of links 125 // in the set. 126 // This could be reduced to O(1) by using the same trick as used when tracking 127 // branch information for veneers: split the container to use one set per type 128 // of branch. With that setup, when a veneer is created and the link needs to 129 // be deleted, if the two points above hold, it must be the minimum element of 130 // the set for its type of branch, and that minimum element will be accessible 131 // in O(1). 132 133 // The offsets of the instructions that have linked to this label. 134 LinksSet links_; 135 // The label location. 136 ptrdiff_t location_; 137 138 static const ptrdiff_t kLocationUnbound = -1; 139 140 // It is not safe to copy labels, so disable the copy constructor and operator 141 // by declaring them private (without an implementation). 142 #if __cplusplus >= 201103L 143 Label(const Label&) = delete; 144 void operator=(const Label&) = delete; 145 #else 146 Label(const Label&); 147 void operator=(const Label&); 148 #endif 149 150 // The Assembler class is responsible for binding and linking labels, since 151 // the stored offsets need to be consistent with the Assembler's buffer. 152 friend class Assembler; 153 // The MacroAssembler and VeneerPool handle resolution of branches to distant 154 // targets. 155 friend class MacroAssembler; 156 friend class VeneerPool; 157 }; 158 159 160 class Assembler; 161 class LiteralPool; 162 163 // A literal is a 32-bit or 64-bit piece of data stored in the instruction 164 // stream and loaded through a pc relative load. The same literal can be 165 // referred to by multiple instructions but a literal can only reside at one 166 // place in memory. A literal can be used by a load before or after being 167 // placed in memory. 168 // 169 // Internally an offset of 0 is associated with a literal which has been 170 // neither used nor placed. Then two possibilities arise: 171 // 1) the label is placed, the offset (stored as offset + 1) is used to 172 // resolve any subsequent load using the label. 173 // 2) the label is not placed and offset is the offset of the last load using 174 // the literal (stored as -offset -1). If multiple loads refer to this 175 // literal then the last load holds the offset of the preceding load and 176 // all loads form a chain. Once the offset is placed all the loads in the 177 // chain are resolved and future loads fall back to possibility 1. 178 class RawLiteral { 179 public: 180 enum DeletionPolicy { 181 kDeletedOnPlacementByPool, 182 kDeletedOnPoolDestruction, 183 kManuallyDeleted 184 }; 185 186 RawLiteral(size_t size, 187 LiteralPool* literal_pool, 188 DeletionPolicy deletion_policy = kManuallyDeleted); 189 190 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are 191 // actually pointing to `Literal<T>` objects. 192 virtual ~RawLiteral() {} 193 194 size_t GetSize() const { 195 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes); 196 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes); 197 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) || 198 (size_ == kQRegSizeInBytes)); 199 return size_; 200 } 201 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); } 202 203 uint64_t GetRawValue128Low64() const { 204 VIXL_ASSERT(size_ == kQRegSizeInBytes); 205 return low64_; 206 } 207 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) { 208 return GetRawValue128Low64(); 209 } 210 211 uint64_t GetRawValue128High64() const { 212 VIXL_ASSERT(size_ == kQRegSizeInBytes); 213 return high64_; 214 } 215 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) { 216 return GetRawValue128High64(); 217 } 218 219 uint64_t GetRawValue64() const { 220 VIXL_ASSERT(size_ == kXRegSizeInBytes); 221 VIXL_ASSERT(high64_ == 0); 222 return low64_; 223 } 224 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) { 225 return GetRawValue64(); 226 } 227 228 uint32_t GetRawValue32() const { 229 VIXL_ASSERT(size_ == kWRegSizeInBytes); 230 VIXL_ASSERT(high64_ == 0); 231 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_)); 232 return static_cast<uint32_t>(low64_); 233 } 234 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) { 235 return GetRawValue32(); 236 } 237 238 bool IsUsed() const { return offset_ < 0; } 239 bool IsPlaced() const { return offset_ > 0; } 240 241 LiteralPool* GetLiteralPool() const { return literal_pool_; } 242 243 ptrdiff_t GetOffset() const { 244 VIXL_ASSERT(IsPlaced()); 245 return offset_ - 1; 246 } 247 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); } 248 249 protected: 250 void SetOffset(ptrdiff_t offset) { 251 VIXL_ASSERT(offset >= 0); 252 VIXL_ASSERT(IsWordAligned(offset)); 253 VIXL_ASSERT(!IsPlaced()); 254 offset_ = offset + 1; 255 } 256 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) { 257 SetOffset(offset); 258 } 259 260 ptrdiff_t GetLastUse() const { 261 VIXL_ASSERT(IsUsed()); 262 return -offset_ - 1; 263 } 264 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); } 265 266 void SetLastUse(ptrdiff_t offset) { 267 VIXL_ASSERT(offset >= 0); 268 VIXL_ASSERT(IsWordAligned(offset)); 269 VIXL_ASSERT(!IsPlaced()); 270 offset_ = -offset - 1; 271 } 272 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) { 273 SetLastUse(offset); 274 } 275 276 size_t size_; 277 ptrdiff_t offset_; 278 uint64_t low64_; 279 uint64_t high64_; 280 281 private: 282 LiteralPool* literal_pool_; 283 DeletionPolicy deletion_policy_; 284 285 friend class Assembler; 286 friend class LiteralPool; 287 }; 288 289 290 template <typename T> 291 class Literal : public RawLiteral { 292 public: 293 explicit Literal(T value, 294 LiteralPool* literal_pool = NULL, 295 RawLiteral::DeletionPolicy ownership = kManuallyDeleted) 296 : RawLiteral(sizeof(value), literal_pool, ownership) { 297 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes); 298 UpdateValue(value); 299 } 300 301 Literal(T high64, 302 T low64, 303 LiteralPool* literal_pool = NULL, 304 RawLiteral::DeletionPolicy ownership = kManuallyDeleted) 305 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) { 306 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2)); 307 UpdateValue(high64, low64); 308 } 309 310 virtual ~Literal() {} 311 312 // Update the value of this literal, if necessary by rewriting the value in 313 // the pool. 314 // If the literal has already been placed in a literal pool, the address of 315 // the start of the code buffer must be provided, as the literal only knows it 316 // offset from there. This also allows patching the value after the code has 317 // been moved in memory. 318 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) { 319 VIXL_ASSERT(sizeof(new_value) == size_); 320 memcpy(&low64_, &new_value, sizeof(new_value)); 321 if (IsPlaced()) { 322 VIXL_ASSERT(code_buffer != NULL); 323 RewriteValueInCode(code_buffer); 324 } 325 } 326 327 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) { 328 VIXL_ASSERT(sizeof(low64) == size_ / 2); 329 memcpy(&low64_, &low64, sizeof(low64)); 330 memcpy(&high64_, &high64, sizeof(high64)); 331 if (IsPlaced()) { 332 VIXL_ASSERT(code_buffer != NULL); 333 RewriteValueInCode(code_buffer); 334 } 335 } 336 337 void UpdateValue(T new_value, const Assembler* assembler); 338 void UpdateValue(T high64, T low64, const Assembler* assembler); 339 340 private: 341 void RewriteValueInCode(uint8_t* code_buffer) { 342 VIXL_ASSERT(IsPlaced()); 343 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes); 344 switch (GetSize()) { 345 case kSRegSizeInBytes: 346 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) = 347 GetRawValue32(); 348 break; 349 case kDRegSizeInBytes: 350 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) = 351 GetRawValue64(); 352 break; 353 default: 354 VIXL_ASSERT(GetSize() == kQRegSizeInBytes); 355 uint64_t* base_address = 356 reinterpret_cast<uint64_t*>(code_buffer + GetOffset()); 357 *base_address = GetRawValue128Low64(); 358 *(base_address + 1) = GetRawValue128High64(); 359 } 360 } 361 }; 362 363 364 // Control whether or not position-independent code should be emitted. 365 enum PositionIndependentCodeOption { 366 // All code generated will be position-independent; all branches and 367 // references to labels generated with the Label class will use PC-relative 368 // addressing. 369 PositionIndependentCode, 370 371 // Allow VIXL to generate code that refers to absolute addresses. With this 372 // option, it will not be possible to copy the code buffer and run it from a 373 // different address; code must be generated in its final location. 374 PositionDependentCode, 375 376 // Allow VIXL to assume that the bottom 12 bits of the address will be 377 // constant, but that the top 48 bits may change. This allows `adrp` to 378 // function in systems which copy code between pages, but otherwise maintain 379 // 4KB page alignment. 380 PageOffsetDependentCode 381 }; 382 383 384 // Control how scaled- and unscaled-offset loads and stores are generated. 385 enum LoadStoreScalingOption { 386 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset, 387 // register-offset, pre-index or post-index instructions if necessary. 388 PreferScaledOffset, 389 390 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset, 391 // register-offset, pre-index or post-index instructions if necessary. 392 PreferUnscaledOffset, 393 394 // Require scaled-immediate-offset instructions. 395 RequireScaledOffset, 396 397 // Require unscaled-immediate-offset instructions. 398 RequireUnscaledOffset 399 }; 400 401 402 // Assembler. 403 class Assembler : public vixl::internal::AssemblerBase { 404 public: 405 explicit Assembler( 406 PositionIndependentCodeOption pic = PositionIndependentCode) 407 : pic_(pic) {} 408 explicit Assembler( 409 size_t capacity, 410 PositionIndependentCodeOption pic = PositionIndependentCode) 411 : AssemblerBase(capacity), pic_(pic) {} 412 Assembler(byte* buffer, 413 size_t capacity, 414 PositionIndependentCodeOption pic = PositionIndependentCode) 415 : AssemblerBase(buffer, capacity), pic_(pic) {} 416 417 // Upon destruction, the code will assert that one of the following is true: 418 // * The Assembler object has not been used. 419 // * Nothing has been emitted since the last Reset() call. 420 // * Nothing has been emitted since the last FinalizeCode() call. 421 ~Assembler() {} 422 423 // System functions. 424 425 // Start generating code from the beginning of the buffer, discarding any code 426 // and data that has already been emitted into the buffer. 427 void Reset(); 428 429 // Label. 430 // Bind a label to the current PC. 431 void bind(Label* label); 432 433 // Bind a label to a specified offset from the start of the buffer. 434 void BindToOffset(Label* label, ptrdiff_t offset); 435 436 // Place a literal at the current PC. 437 void place(RawLiteral* literal); 438 439 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) { 440 return GetCursorOffset(); 441 } 442 443 VIXL_DEPRECATED("GetBuffer().GetCapacity()", 444 ptrdiff_t GetBufferEndOffset() const) { 445 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity()); 446 } 447 VIXL_DEPRECATED("GetBuffer().GetCapacity()", 448 ptrdiff_t BufferEndOffset() const) { 449 return GetBuffer().GetCapacity(); 450 } 451 452 // Return the address of a bound label. 453 template <typename T> 454 T GetLabelAddress(const Label* label) const { 455 VIXL_ASSERT(label->IsBound()); 456 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); 457 return GetBuffer().GetOffsetAddress<T>(label->GetLocation()); 458 } 459 460 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) { 461 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset); 462 } 463 VIXL_DEPRECATED("GetInstructionAt", 464 Instruction* InstructionAt(ptrdiff_t instruction_offset)) { 465 return GetInstructionAt(instruction_offset); 466 } 467 468 ptrdiff_t GetInstructionOffset(Instruction* instruction) { 469 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1); 470 ptrdiff_t offset = 471 instruction - GetBuffer()->GetStartAddress<Instruction*>(); 472 VIXL_ASSERT((0 <= offset) && 473 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity()))); 474 return offset; 475 } 476 VIXL_DEPRECATED("GetInstructionOffset", 477 ptrdiff_t InstructionOffset(Instruction* instruction)) { 478 return GetInstructionOffset(instruction); 479 } 480 481 // Instruction set functions. 482 483 // Branch / Jump instructions. 484 // Branch to register. 485 void br(const Register& xn); 486 487 // Branch with link to register. 488 void blr(const Register& xn); 489 490 // Branch to register with return hint. 491 void ret(const Register& xn = lr); 492 493 // Unconditional branch to label. 494 void b(Label* label); 495 496 // Conditional branch to label. 497 void b(Label* label, Condition cond); 498 499 // Unconditional branch to PC offset. 500 void b(int64_t imm26); 501 502 // Conditional branch to PC offset. 503 void b(int64_t imm19, Condition cond); 504 505 // Branch with link to label. 506 void bl(Label* label); 507 508 // Branch with link to PC offset. 509 void bl(int64_t imm26); 510 511 // Compare and branch to label if zero. 512 void cbz(const Register& rt, Label* label); 513 514 // Compare and branch to PC offset if zero. 515 void cbz(const Register& rt, int64_t imm19); 516 517 // Compare and branch to label if not zero. 518 void cbnz(const Register& rt, Label* label); 519 520 // Compare and branch to PC offset if not zero. 521 void cbnz(const Register& rt, int64_t imm19); 522 523 // Table lookup from one register. 524 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 525 526 // Table lookup from two registers. 527 void tbl(const VRegister& vd, 528 const VRegister& vn, 529 const VRegister& vn2, 530 const VRegister& vm); 531 532 // Table lookup from three registers. 533 void tbl(const VRegister& vd, 534 const VRegister& vn, 535 const VRegister& vn2, 536 const VRegister& vn3, 537 const VRegister& vm); 538 539 // Table lookup from four registers. 540 void tbl(const VRegister& vd, 541 const VRegister& vn, 542 const VRegister& vn2, 543 const VRegister& vn3, 544 const VRegister& vn4, 545 const VRegister& vm); 546 547 // Table lookup extension from one register. 548 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 549 550 // Table lookup extension from two registers. 551 void tbx(const VRegister& vd, 552 const VRegister& vn, 553 const VRegister& vn2, 554 const VRegister& vm); 555 556 // Table lookup extension from three registers. 557 void tbx(const VRegister& vd, 558 const VRegister& vn, 559 const VRegister& vn2, 560 const VRegister& vn3, 561 const VRegister& vm); 562 563 // Table lookup extension from four registers. 564 void tbx(const VRegister& vd, 565 const VRegister& vn, 566 const VRegister& vn2, 567 const VRegister& vn3, 568 const VRegister& vn4, 569 const VRegister& vm); 570 571 // Test bit and branch to label if zero. 572 void tbz(const Register& rt, unsigned bit_pos, Label* label); 573 574 // Test bit and branch to PC offset if zero. 575 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14); 576 577 // Test bit and branch to label if not zero. 578 void tbnz(const Register& rt, unsigned bit_pos, Label* label); 579 580 // Test bit and branch to PC offset if not zero. 581 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14); 582 583 // Address calculation instructions. 584 // Calculate a PC-relative address. Unlike for branches the offset in adr is 585 // unscaled (i.e. the result can be unaligned). 586 587 // Calculate the address of a label. 588 void adr(const Register& xd, Label* label); 589 590 // Calculate the address of a PC offset. 591 void adr(const Register& xd, int64_t imm21); 592 593 // Calculate the page address of a label. 594 void adrp(const Register& xd, Label* label); 595 596 // Calculate the page address of a PC offset. 597 void adrp(const Register& xd, int64_t imm21); 598 599 // Data Processing instructions. 600 // Add. 601 void add(const Register& rd, const Register& rn, const Operand& operand); 602 603 // Add and update status flags. 604 void adds(const Register& rd, const Register& rn, const Operand& operand); 605 606 // Compare negative. 607 void cmn(const Register& rn, const Operand& operand); 608 609 // Subtract. 610 void sub(const Register& rd, const Register& rn, const Operand& operand); 611 612 // Subtract and update status flags. 613 void subs(const Register& rd, const Register& rn, const Operand& operand); 614 615 // Compare. 616 void cmp(const Register& rn, const Operand& operand); 617 618 // Negate. 619 void neg(const Register& rd, const Operand& operand); 620 621 // Negate and update status flags. 622 void negs(const Register& rd, const Operand& operand); 623 624 // Add with carry bit. 625 void adc(const Register& rd, const Register& rn, const Operand& operand); 626 627 // Add with carry bit and update status flags. 628 void adcs(const Register& rd, const Register& rn, const Operand& operand); 629 630 // Subtract with carry bit. 631 void sbc(const Register& rd, const Register& rn, const Operand& operand); 632 633 // Subtract with carry bit and update status flags. 634 void sbcs(const Register& rd, const Register& rn, const Operand& operand); 635 636 // Negate with carry bit. 637 void ngc(const Register& rd, const Operand& operand); 638 639 // Negate with carry bit and update status flags. 640 void ngcs(const Register& rd, const Operand& operand); 641 642 // Logical instructions. 643 // Bitwise and (A & B). 644 void and_(const Register& rd, const Register& rn, const Operand& operand); 645 646 // Bitwise and (A & B) and update status flags. 647 void ands(const Register& rd, const Register& rn, const Operand& operand); 648 649 // Bit test and set flags. 650 void tst(const Register& rn, const Operand& operand); 651 652 // Bit clear (A & ~B). 653 void bic(const Register& rd, const Register& rn, const Operand& operand); 654 655 // Bit clear (A & ~B) and update status flags. 656 void bics(const Register& rd, const Register& rn, const Operand& operand); 657 658 // Bitwise or (A | B). 659 void orr(const Register& rd, const Register& rn, const Operand& operand); 660 661 // Bitwise nor (A | ~B). 662 void orn(const Register& rd, const Register& rn, const Operand& operand); 663 664 // Bitwise eor/xor (A ^ B). 665 void eor(const Register& rd, const Register& rn, const Operand& operand); 666 667 // Bitwise enor/xnor (A ^ ~B). 668 void eon(const Register& rd, const Register& rn, const Operand& operand); 669 670 // Logical shift left by variable. 671 void lslv(const Register& rd, const Register& rn, const Register& rm); 672 673 // Logical shift right by variable. 674 void lsrv(const Register& rd, const Register& rn, const Register& rm); 675 676 // Arithmetic shift right by variable. 677 void asrv(const Register& rd, const Register& rn, const Register& rm); 678 679 // Rotate right by variable. 680 void rorv(const Register& rd, const Register& rn, const Register& rm); 681 682 // Bitfield instructions. 683 // Bitfield move. 684 void bfm(const Register& rd, 685 const Register& rn, 686 unsigned immr, 687 unsigned imms); 688 689 // Signed bitfield move. 690 void sbfm(const Register& rd, 691 const Register& rn, 692 unsigned immr, 693 unsigned imms); 694 695 // Unsigned bitfield move. 696 void ubfm(const Register& rd, 697 const Register& rn, 698 unsigned immr, 699 unsigned imms); 700 701 // Bfm aliases. 702 // Bitfield insert. 703 void bfi(const Register& rd, 704 const Register& rn, 705 unsigned lsb, 706 unsigned width) { 707 VIXL_ASSERT(width >= 1); 708 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 709 bfm(rd, 710 rn, 711 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), 712 width - 1); 713 } 714 715 // Bitfield extract and insert low. 716 void bfxil(const Register& rd, 717 const Register& rn, 718 unsigned lsb, 719 unsigned width) { 720 VIXL_ASSERT(width >= 1); 721 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 722 bfm(rd, rn, lsb, lsb + width - 1); 723 } 724 725 // Sbfm aliases. 726 // Arithmetic shift right. 727 void asr(const Register& rd, const Register& rn, unsigned shift) { 728 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits())); 729 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1); 730 } 731 732 // Signed bitfield insert with zero at right. 733 void sbfiz(const Register& rd, 734 const Register& rn, 735 unsigned lsb, 736 unsigned width) { 737 VIXL_ASSERT(width >= 1); 738 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 739 sbfm(rd, 740 rn, 741 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), 742 width - 1); 743 } 744 745 // Signed bitfield extract. 746 void sbfx(const Register& rd, 747 const Register& rn, 748 unsigned lsb, 749 unsigned width) { 750 VIXL_ASSERT(width >= 1); 751 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 752 sbfm(rd, rn, lsb, lsb + width - 1); 753 } 754 755 // Signed extend byte. 756 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } 757 758 // Signed extend halfword. 759 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } 760 761 // Signed extend word. 762 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } 763 764 // Ubfm aliases. 765 // Logical shift left. 766 void lsl(const Register& rd, const Register& rn, unsigned shift) { 767 unsigned reg_size = rd.GetSizeInBits(); 768 VIXL_ASSERT(shift < reg_size); 769 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); 770 } 771 772 // Logical shift right. 773 void lsr(const Register& rd, const Register& rn, unsigned shift) { 774 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits())); 775 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1); 776 } 777 778 // Unsigned bitfield insert with zero at right. 779 void ubfiz(const Register& rd, 780 const Register& rn, 781 unsigned lsb, 782 unsigned width) { 783 VIXL_ASSERT(width >= 1); 784 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 785 ubfm(rd, 786 rn, 787 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), 788 width - 1); 789 } 790 791 // Unsigned bitfield extract. 792 void ubfx(const Register& rd, 793 const Register& rn, 794 unsigned lsb, 795 unsigned width) { 796 VIXL_ASSERT(width >= 1); 797 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits())); 798 ubfm(rd, rn, lsb, lsb + width - 1); 799 } 800 801 // Unsigned extend byte. 802 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } 803 804 // Unsigned extend halfword. 805 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } 806 807 // Unsigned extend word. 808 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } 809 810 // Extract. 811 void extr(const Register& rd, 812 const Register& rn, 813 const Register& rm, 814 unsigned lsb); 815 816 // Conditional select: rd = cond ? rn : rm. 817 void csel(const Register& rd, 818 const Register& rn, 819 const Register& rm, 820 Condition cond); 821 822 // Conditional select increment: rd = cond ? rn : rm + 1. 823 void csinc(const Register& rd, 824 const Register& rn, 825 const Register& rm, 826 Condition cond); 827 828 // Conditional select inversion: rd = cond ? rn : ~rm. 829 void csinv(const Register& rd, 830 const Register& rn, 831 const Register& rm, 832 Condition cond); 833 834 // Conditional select negation: rd = cond ? rn : -rm. 835 void csneg(const Register& rd, 836 const Register& rn, 837 const Register& rm, 838 Condition cond); 839 840 // Conditional set: rd = cond ? 1 : 0. 841 void cset(const Register& rd, Condition cond); 842 843 // Conditional set mask: rd = cond ? -1 : 0. 844 void csetm(const Register& rd, Condition cond); 845 846 // Conditional increment: rd = cond ? rn + 1 : rn. 847 void cinc(const Register& rd, const Register& rn, Condition cond); 848 849 // Conditional invert: rd = cond ? ~rn : rn. 850 void cinv(const Register& rd, const Register& rn, Condition cond); 851 852 // Conditional negate: rd = cond ? -rn : rn. 853 void cneg(const Register& rd, const Register& rn, Condition cond); 854 855 // Rotate right. 856 void ror(const Register& rd, const Register& rs, unsigned shift) { 857 extr(rd, rs, rs, shift); 858 } 859 860 // Conditional comparison. 861 // Conditional compare negative. 862 void ccmn(const Register& rn, 863 const Operand& operand, 864 StatusFlags nzcv, 865 Condition cond); 866 867 // Conditional compare. 868 void ccmp(const Register& rn, 869 const Operand& operand, 870 StatusFlags nzcv, 871 Condition cond); 872 873 // CRC-32 checksum from byte. 874 void crc32b(const Register& wd, const Register& wn, const Register& wm); 875 876 // CRC-32 checksum from half-word. 877 void crc32h(const Register& wd, const Register& wn, const Register& wm); 878 879 // CRC-32 checksum from word. 880 void crc32w(const Register& wd, const Register& wn, const Register& wm); 881 882 // CRC-32 checksum from double word. 883 void crc32x(const Register& wd, const Register& wn, const Register& xm); 884 885 // CRC-32 C checksum from byte. 886 void crc32cb(const Register& wd, const Register& wn, const Register& wm); 887 888 // CRC-32 C checksum from half-word. 889 void crc32ch(const Register& wd, const Register& wn, const Register& wm); 890 891 // CRC-32 C checksum from word. 892 void crc32cw(const Register& wd, const Register& wn, const Register& wm); 893 894 // CRC-32C checksum from double word. 895 void crc32cx(const Register& wd, const Register& wn, const Register& xm); 896 897 // Multiply. 898 void mul(const Register& rd, const Register& rn, const Register& rm); 899 900 // Negated multiply. 901 void mneg(const Register& rd, const Register& rn, const Register& rm); 902 903 // Signed long multiply: 32 x 32 -> 64-bit. 904 void smull(const Register& xd, const Register& wn, const Register& wm); 905 906 // Signed multiply high: 64 x 64 -> 64-bit <127:64>. 907 void smulh(const Register& xd, const Register& xn, const Register& xm); 908 909 // Multiply and accumulate. 910 void madd(const Register& rd, 911 const Register& rn, 912 const Register& rm, 913 const Register& ra); 914 915 // Multiply and subtract. 916 void msub(const Register& rd, 917 const Register& rn, 918 const Register& rm, 919 const Register& ra); 920 921 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit. 922 void smaddl(const Register& xd, 923 const Register& wn, 924 const Register& wm, 925 const Register& xa); 926 927 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit. 928 void umaddl(const Register& xd, 929 const Register& wn, 930 const Register& wm, 931 const Register& xa); 932 933 // Unsigned long multiply: 32 x 32 -> 64-bit. 934 void umull(const Register& xd, const Register& wn, const Register& wm) { 935 umaddl(xd, wn, wm, xzr); 936 } 937 938 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>. 939 void umulh(const Register& xd, const Register& xn, const Register& xm); 940 941 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit. 942 void smsubl(const Register& xd, 943 const Register& wn, 944 const Register& wm, 945 const Register& xa); 946 947 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit. 948 void umsubl(const Register& xd, 949 const Register& wn, 950 const Register& wm, 951 const Register& xa); 952 953 // Signed integer divide. 954 void sdiv(const Register& rd, const Register& rn, const Register& rm); 955 956 // Unsigned integer divide. 957 void udiv(const Register& rd, const Register& rn, const Register& rm); 958 959 // Bit reverse. 960 void rbit(const Register& rd, const Register& rn); 961 962 // Reverse bytes in 16-bit half words. 963 void rev16(const Register& rd, const Register& rn); 964 965 // Reverse bytes in 32-bit words. 966 void rev32(const Register& xd, const Register& xn); 967 968 // Reverse bytes. 969 void rev(const Register& rd, const Register& rn); 970 971 // Count leading zeroes. 972 void clz(const Register& rd, const Register& rn); 973 974 // Count leading sign bits. 975 void cls(const Register& rd, const Register& rn); 976 977 // Memory instructions. 978 // Load integer or FP register. 979 void ldr(const CPURegister& rt, 980 const MemOperand& src, 981 LoadStoreScalingOption option = PreferScaledOffset); 982 983 // Store integer or FP register. 984 void str(const CPURegister& rt, 985 const MemOperand& dst, 986 LoadStoreScalingOption option = PreferScaledOffset); 987 988 // Load word with sign extension. 989 void ldrsw(const Register& xt, 990 const MemOperand& src, 991 LoadStoreScalingOption option = PreferScaledOffset); 992 993 // Load byte. 994 void ldrb(const Register& rt, 995 const MemOperand& src, 996 LoadStoreScalingOption option = PreferScaledOffset); 997 998 // Store byte. 999 void strb(const Register& rt, 1000 const MemOperand& dst, 1001 LoadStoreScalingOption option = PreferScaledOffset); 1002 1003 // Load byte with sign extension. 1004 void ldrsb(const Register& rt, 1005 const MemOperand& src, 1006 LoadStoreScalingOption option = PreferScaledOffset); 1007 1008 // Load half-word. 1009 void ldrh(const Register& rt, 1010 const MemOperand& src, 1011 LoadStoreScalingOption option = PreferScaledOffset); 1012 1013 // Store half-word. 1014 void strh(const Register& rt, 1015 const MemOperand& dst, 1016 LoadStoreScalingOption option = PreferScaledOffset); 1017 1018 // Load half-word with sign extension. 1019 void ldrsh(const Register& rt, 1020 const MemOperand& src, 1021 LoadStoreScalingOption option = PreferScaledOffset); 1022 1023 // Load integer or FP register (with unscaled offset). 1024 void ldur(const CPURegister& rt, 1025 const MemOperand& src, 1026 LoadStoreScalingOption option = PreferUnscaledOffset); 1027 1028 // Store integer or FP register (with unscaled offset). 1029 void stur(const CPURegister& rt, 1030 const MemOperand& src, 1031 LoadStoreScalingOption option = PreferUnscaledOffset); 1032 1033 // Load word with sign extension. 1034 void ldursw(const Register& xt, 1035 const MemOperand& src, 1036 LoadStoreScalingOption option = PreferUnscaledOffset); 1037 1038 // Load byte (with unscaled offset). 1039 void ldurb(const Register& rt, 1040 const MemOperand& src, 1041 LoadStoreScalingOption option = PreferUnscaledOffset); 1042 1043 // Store byte (with unscaled offset). 1044 void sturb(const Register& rt, 1045 const MemOperand& dst, 1046 LoadStoreScalingOption option = PreferUnscaledOffset); 1047 1048 // Load byte with sign extension (and unscaled offset). 1049 void ldursb(const Register& rt, 1050 const MemOperand& src, 1051 LoadStoreScalingOption option = PreferUnscaledOffset); 1052 1053 // Load half-word (with unscaled offset). 1054 void ldurh(const Register& rt, 1055 const MemOperand& src, 1056 LoadStoreScalingOption option = PreferUnscaledOffset); 1057 1058 // Store half-word (with unscaled offset). 1059 void sturh(const Register& rt, 1060 const MemOperand& dst, 1061 LoadStoreScalingOption option = PreferUnscaledOffset); 1062 1063 // Load half-word with sign extension (and unscaled offset). 1064 void ldursh(const Register& rt, 1065 const MemOperand& src, 1066 LoadStoreScalingOption option = PreferUnscaledOffset); 1067 1068 // Load integer or FP register pair. 1069 void ldp(const CPURegister& rt, 1070 const CPURegister& rt2, 1071 const MemOperand& src); 1072 1073 // Store integer or FP register pair. 1074 void stp(const CPURegister& rt, 1075 const CPURegister& rt2, 1076 const MemOperand& dst); 1077 1078 // Load word pair with sign extension. 1079 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src); 1080 1081 // Load integer or FP register pair, non-temporal. 1082 void ldnp(const CPURegister& rt, 1083 const CPURegister& rt2, 1084 const MemOperand& src); 1085 1086 // Store integer or FP register pair, non-temporal. 1087 void stnp(const CPURegister& rt, 1088 const CPURegister& rt2, 1089 const MemOperand& dst); 1090 1091 // Load integer or FP register from literal pool. 1092 void ldr(const CPURegister& rt, RawLiteral* literal); 1093 1094 // Load word with sign extension from literal pool. 1095 void ldrsw(const Register& xt, RawLiteral* literal); 1096 1097 // Load integer or FP register from pc + imm19 << 2. 1098 void ldr(const CPURegister& rt, int64_t imm19); 1099 1100 // Load word with sign extension from pc + imm19 << 2. 1101 void ldrsw(const Register& xt, int64_t imm19); 1102 1103 // Store exclusive byte. 1104 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst); 1105 1106 // Store exclusive half-word. 1107 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst); 1108 1109 // Store exclusive register. 1110 void stxr(const Register& rs, const Register& rt, const MemOperand& dst); 1111 1112 // Load exclusive byte. 1113 void ldxrb(const Register& rt, const MemOperand& src); 1114 1115 // Load exclusive half-word. 1116 void ldxrh(const Register& rt, const MemOperand& src); 1117 1118 // Load exclusive register. 1119 void ldxr(const Register& rt, const MemOperand& src); 1120 1121 // Store exclusive register pair. 1122 void stxp(const Register& rs, 1123 const Register& rt, 1124 const Register& rt2, 1125 const MemOperand& dst); 1126 1127 // Load exclusive register pair. 1128 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src); 1129 1130 // Store-release exclusive byte. 1131 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst); 1132 1133 // Store-release exclusive half-word. 1134 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst); 1135 1136 // Store-release exclusive register. 1137 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst); 1138 1139 // Load-acquire exclusive byte. 1140 void ldaxrb(const Register& rt, const MemOperand& src); 1141 1142 // Load-acquire exclusive half-word. 1143 void ldaxrh(const Register& rt, const MemOperand& src); 1144 1145 // Load-acquire exclusive register. 1146 void ldaxr(const Register& rt, const MemOperand& src); 1147 1148 // Store-release exclusive register pair. 1149 void stlxp(const Register& rs, 1150 const Register& rt, 1151 const Register& rt2, 1152 const MemOperand& dst); 1153 1154 // Load-acquire exclusive register pair. 1155 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src); 1156 1157 // Store-release byte. 1158 void stlrb(const Register& rt, const MemOperand& dst); 1159 1160 // Store-release half-word. 1161 void stlrh(const Register& rt, const MemOperand& dst); 1162 1163 // Store-release register. 1164 void stlr(const Register& rt, const MemOperand& dst); 1165 1166 // Load-acquire byte. 1167 void ldarb(const Register& rt, const MemOperand& src); 1168 1169 // Load-acquire half-word. 1170 void ldarh(const Register& rt, const MemOperand& src); 1171 1172 // Load-acquire register. 1173 void ldar(const Register& rt, const MemOperand& src); 1174 1175 // Prefetch memory. 1176 void prfm(PrefetchOperation op, 1177 const MemOperand& addr, 1178 LoadStoreScalingOption option = PreferScaledOffset); 1179 1180 // Prefetch memory (with unscaled offset). 1181 void prfum(PrefetchOperation op, 1182 const MemOperand& addr, 1183 LoadStoreScalingOption option = PreferUnscaledOffset); 1184 1185 // Prefetch memory in the literal pool. 1186 void prfm(PrefetchOperation op, RawLiteral* literal); 1187 1188 // Prefetch from pc + imm19 << 2. 1189 void prfm(PrefetchOperation op, int64_t imm19); 1190 1191 // Move instructions. The default shift of -1 indicates that the move 1192 // instruction will calculate an appropriate 16-bit immediate and left shift 1193 // that is equal to the 64-bit immediate argument. If an explicit left shift 1194 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. 1195 // 1196 // For movk, an explicit shift can be used to indicate which half word should 1197 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant 1198 // half word with zero, whereas movk(x0, 0, 48) will overwrite the 1199 // most-significant. 1200 1201 // Move immediate and keep. 1202 void movk(const Register& rd, uint64_t imm, int shift = -1) { 1203 MoveWide(rd, imm, shift, MOVK); 1204 } 1205 1206 // Move inverted immediate. 1207 void movn(const Register& rd, uint64_t imm, int shift = -1) { 1208 MoveWide(rd, imm, shift, MOVN); 1209 } 1210 1211 // Move immediate. 1212 void movz(const Register& rd, uint64_t imm, int shift = -1) { 1213 MoveWide(rd, imm, shift, MOVZ); 1214 } 1215 1216 // Misc instructions. 1217 // Monitor debug-mode breakpoint. 1218 void brk(int code); 1219 1220 // Halting debug-mode breakpoint. 1221 void hlt(int code); 1222 1223 // Generate exception targeting EL1. 1224 void svc(int code); 1225 1226 // Move register to register. 1227 void mov(const Register& rd, const Register& rn); 1228 1229 // Move inverted operand to register. 1230 void mvn(const Register& rd, const Operand& operand); 1231 1232 // System instructions. 1233 // Move to register from system register. 1234 void mrs(const Register& xt, SystemRegister sysreg); 1235 1236 // Move from register to system register. 1237 void msr(SystemRegister sysreg, const Register& xt); 1238 1239 // System instruction. 1240 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr); 1241 1242 // System instruction with pre-encoded op (op1:crn:crm:op2). 1243 void sys(int op, const Register& xt = xzr); 1244 1245 // System data cache operation. 1246 void dc(DataCacheOp op, const Register& rt); 1247 1248 // System instruction cache operation. 1249 void ic(InstructionCacheOp op, const Register& rt); 1250 1251 // System hint. 1252 void hint(SystemHint code); 1253 1254 // Clear exclusive monitor. 1255 void clrex(int imm4 = 0xf); 1256 1257 // Data memory barrier. 1258 void dmb(BarrierDomain domain, BarrierType type); 1259 1260 // Data synchronization barrier. 1261 void dsb(BarrierDomain domain, BarrierType type); 1262 1263 // Instruction synchronization barrier. 1264 void isb(); 1265 1266 // Alias for system instructions. 1267 // No-op. 1268 void nop() { hint(NOP); } 1269 1270 // FP and NEON instructions. 1271 // Move double precision immediate to FP register. 1272 void fmov(const VRegister& vd, double imm); 1273 1274 // Move single precision immediate to FP register. 1275 void fmov(const VRegister& vd, float imm); 1276 1277 // Move FP register to register. 1278 void fmov(const Register& rd, const VRegister& fn); 1279 1280 // Move register to FP register. 1281 void fmov(const VRegister& vd, const Register& rn); 1282 1283 // Move FP register to FP register. 1284 void fmov(const VRegister& vd, const VRegister& fn); 1285 1286 // Move 64-bit register to top half of 128-bit FP register. 1287 void fmov(const VRegister& vd, int index, const Register& rn); 1288 1289 // Move top half of 128-bit FP register to 64-bit register. 1290 void fmov(const Register& rd, const VRegister& vn, int index); 1291 1292 // FP add. 1293 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1294 1295 // FP subtract. 1296 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1297 1298 // FP multiply. 1299 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1300 1301 // FP fused multiply-add. 1302 void fmadd(const VRegister& vd, 1303 const VRegister& vn, 1304 const VRegister& vm, 1305 const VRegister& va); 1306 1307 // FP fused multiply-subtract. 1308 void fmsub(const VRegister& vd, 1309 const VRegister& vn, 1310 const VRegister& vm, 1311 const VRegister& va); 1312 1313 // FP fused multiply-add and negate. 1314 void fnmadd(const VRegister& vd, 1315 const VRegister& vn, 1316 const VRegister& vm, 1317 const VRegister& va); 1318 1319 // FP fused multiply-subtract and negate. 1320 void fnmsub(const VRegister& vd, 1321 const VRegister& vn, 1322 const VRegister& vm, 1323 const VRegister& va); 1324 1325 // FP multiply-negate scalar. 1326 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1327 1328 // FP reciprocal exponent scalar. 1329 void frecpx(const VRegister& vd, const VRegister& vn); 1330 1331 // FP divide. 1332 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1333 1334 // FP maximum. 1335 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1336 1337 // FP minimum. 1338 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1339 1340 // FP maximum number. 1341 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1342 1343 // FP minimum number. 1344 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); 1345 1346 // FP absolute. 1347 void fabs(const VRegister& vd, const VRegister& vn); 1348 1349 // FP negate. 1350 void fneg(const VRegister& vd, const VRegister& vn); 1351 1352 // FP square root. 1353 void fsqrt(const VRegister& vd, const VRegister& vn); 1354 1355 // FP round to integer, nearest with ties to away. 1356 void frinta(const VRegister& vd, const VRegister& vn); 1357 1358 // FP round to integer, implicit rounding. 1359 void frinti(const VRegister& vd, const VRegister& vn); 1360 1361 // FP round to integer, toward minus infinity. 1362 void frintm(const VRegister& vd, const VRegister& vn); 1363 1364 // FP round to integer, nearest with ties to even. 1365 void frintn(const VRegister& vd, const VRegister& vn); 1366 1367 // FP round to integer, toward plus infinity. 1368 void frintp(const VRegister& vd, const VRegister& vn); 1369 1370 // FP round to integer, exact, implicit rounding. 1371 void frintx(const VRegister& vd, const VRegister& vn); 1372 1373 // FP round to integer, towards zero. 1374 void frintz(const VRegister& vd, const VRegister& vn); 1375 1376 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap); 1377 1378 void FPCompareMacro(const VRegister& vn, 1379 const VRegister& vm, 1380 FPTrapFlags trap); 1381 1382 // FP compare registers. 1383 void fcmp(const VRegister& vn, const VRegister& vm); 1384 1385 // FP compare immediate. 1386 void fcmp(const VRegister& vn, double value); 1387 1388 void FPCCompareMacro(const VRegister& vn, 1389 const VRegister& vm, 1390 StatusFlags nzcv, 1391 Condition cond, 1392 FPTrapFlags trap); 1393 1394 // FP conditional compare. 1395 void fccmp(const VRegister& vn, 1396 const VRegister& vm, 1397 StatusFlags nzcv, 1398 Condition cond); 1399 1400 // FP signaling compare registers. 1401 void fcmpe(const VRegister& vn, const VRegister& vm); 1402 1403 // FP signaling compare immediate. 1404 void fcmpe(const VRegister& vn, double value); 1405 1406 // FP conditional signaling compare. 1407 void fccmpe(const VRegister& vn, 1408 const VRegister& vm, 1409 StatusFlags nzcv, 1410 Condition cond); 1411 1412 // FP conditional select. 1413 void fcsel(const VRegister& vd, 1414 const VRegister& vn, 1415 const VRegister& vm, 1416 Condition cond); 1417 1418 // Common FP Convert functions. 1419 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); 1420 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); 1421 1422 // FP convert between precisions. 1423 void fcvt(const VRegister& vd, const VRegister& vn); 1424 1425 // FP convert to higher precision. 1426 void fcvtl(const VRegister& vd, const VRegister& vn); 1427 1428 // FP convert to higher precision (second part). 1429 void fcvtl2(const VRegister& vd, const VRegister& vn); 1430 1431 // FP convert to lower precision. 1432 void fcvtn(const VRegister& vd, const VRegister& vn); 1433 1434 // FP convert to lower prevision (second part). 1435 void fcvtn2(const VRegister& vd, const VRegister& vn); 1436 1437 // FP convert to lower precision, rounding to odd. 1438 void fcvtxn(const VRegister& vd, const VRegister& vn); 1439 1440 // FP convert to lower precision, rounding to odd (second part). 1441 void fcvtxn2(const VRegister& vd, const VRegister& vn); 1442 1443 // FP convert to signed integer, nearest with ties to away. 1444 void fcvtas(const Register& rd, const VRegister& vn); 1445 1446 // FP convert to unsigned integer, nearest with ties to away. 1447 void fcvtau(const Register& rd, const VRegister& vn); 1448 1449 // FP convert to signed integer, nearest with ties to away. 1450 void fcvtas(const VRegister& vd, const VRegister& vn); 1451 1452 // FP convert to unsigned integer, nearest with ties to away. 1453 void fcvtau(const VRegister& vd, const VRegister& vn); 1454 1455 // FP convert to signed integer, round towards -infinity. 1456 void fcvtms(const Register& rd, const VRegister& vn); 1457 1458 // FP convert to unsigned integer, round towards -infinity. 1459 void fcvtmu(const Register& rd, const VRegister& vn); 1460 1461 // FP convert to signed integer, round towards -infinity. 1462 void fcvtms(const VRegister& vd, const VRegister& vn); 1463 1464 // FP convert to unsigned integer, round towards -infinity. 1465 void fcvtmu(const VRegister& vd, const VRegister& vn); 1466 1467 // FP convert to signed integer, nearest with ties to even. 1468 void fcvtns(const Register& rd, const VRegister& vn); 1469 1470 // FP convert to unsigned integer, nearest with ties to even. 1471 void fcvtnu(const Register& rd, const VRegister& vn); 1472 1473 // FP convert to signed integer, nearest with ties to even. 1474 void fcvtns(const VRegister& rd, const VRegister& vn); 1475 1476 // FP convert to unsigned integer, nearest with ties to even. 1477 void fcvtnu(const VRegister& rd, const VRegister& vn); 1478 1479 // FP convert to signed integer or fixed-point, round towards zero. 1480 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); 1481 1482 // FP convert to unsigned integer or fixed-point, round towards zero. 1483 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); 1484 1485 // FP convert to signed integer or fixed-point, round towards zero. 1486 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); 1487 1488 // FP convert to unsigned integer or fixed-point, round towards zero. 1489 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); 1490 1491 // FP convert to signed integer, round towards +infinity. 1492 void fcvtps(const Register& rd, const VRegister& vn); 1493 1494 // FP convert to unsigned integer, round towards +infinity. 1495 void fcvtpu(const Register& rd, const VRegister& vn); 1496 1497 // FP convert to signed integer, round towards +infinity. 1498 void fcvtps(const VRegister& vd, const VRegister& vn); 1499 1500 // FP convert to unsigned integer, round towards +infinity. 1501 void fcvtpu(const VRegister& vd, const VRegister& vn); 1502 1503 // Convert signed integer or fixed point to FP. 1504 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1505 1506 // Convert unsigned integer or fixed point to FP. 1507 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1508 1509 // Convert signed integer or fixed-point to FP. 1510 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1511 1512 // Convert unsigned integer or fixed-point to FP. 1513 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1514 1515 // Unsigned absolute difference. 1516 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1517 1518 // Signed absolute difference. 1519 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1520 1521 // Unsigned absolute difference and accumulate. 1522 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1523 1524 // Signed absolute difference and accumulate. 1525 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1526 1527 // Add. 1528 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1529 1530 // Subtract. 1531 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1532 1533 // Unsigned halving add. 1534 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1535 1536 // Signed halving add. 1537 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1538 1539 // Unsigned rounding halving add. 1540 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1541 1542 // Signed rounding halving add. 1543 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1544 1545 // Unsigned halving sub. 1546 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1547 1548 // Signed halving sub. 1549 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1550 1551 // Unsigned saturating add. 1552 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1553 1554 // Signed saturating add. 1555 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1556 1557 // Unsigned saturating subtract. 1558 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1559 1560 // Signed saturating subtract. 1561 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1562 1563 // Add pairwise. 1564 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1565 1566 // Add pair of elements scalar. 1567 void addp(const VRegister& vd, const VRegister& vn); 1568 1569 // Multiply-add to accumulator. 1570 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1571 1572 // Multiply-subtract to accumulator. 1573 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1574 1575 // Multiply. 1576 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1577 1578 // Multiply by scalar element. 1579 void mul(const VRegister& vd, 1580 const VRegister& vn, 1581 const VRegister& vm, 1582 int vm_index); 1583 1584 // Multiply-add by scalar element. 1585 void mla(const VRegister& vd, 1586 const VRegister& vn, 1587 const VRegister& vm, 1588 int vm_index); 1589 1590 // Multiply-subtract by scalar element. 1591 void mls(const VRegister& vd, 1592 const VRegister& vn, 1593 const VRegister& vm, 1594 int vm_index); 1595 1596 // Signed long multiply-add by scalar element. 1597 void smlal(const VRegister& vd, 1598 const VRegister& vn, 1599 const VRegister& vm, 1600 int vm_index); 1601 1602 // Signed long multiply-add by scalar element (second part). 1603 void smlal2(const VRegister& vd, 1604 const VRegister& vn, 1605 const VRegister& vm, 1606 int vm_index); 1607 1608 // Unsigned long multiply-add by scalar element. 1609 void umlal(const VRegister& vd, 1610 const VRegister& vn, 1611 const VRegister& vm, 1612 int vm_index); 1613 1614 // Unsigned long multiply-add by scalar element (second part). 1615 void umlal2(const VRegister& vd, 1616 const VRegister& vn, 1617 const VRegister& vm, 1618 int vm_index); 1619 1620 // Signed long multiply-sub by scalar element. 1621 void smlsl(const VRegister& vd, 1622 const VRegister& vn, 1623 const VRegister& vm, 1624 int vm_index); 1625 1626 // Signed long multiply-sub by scalar element (second part). 1627 void smlsl2(const VRegister& vd, 1628 const VRegister& vn, 1629 const VRegister& vm, 1630 int vm_index); 1631 1632 // Unsigned long multiply-sub by scalar element. 1633 void umlsl(const VRegister& vd, 1634 const VRegister& vn, 1635 const VRegister& vm, 1636 int vm_index); 1637 1638 // Unsigned long multiply-sub by scalar element (second part). 1639 void umlsl2(const VRegister& vd, 1640 const VRegister& vn, 1641 const VRegister& vm, 1642 int vm_index); 1643 1644 // Signed long multiply by scalar element. 1645 void smull(const VRegister& vd, 1646 const VRegister& vn, 1647 const VRegister& vm, 1648 int vm_index); 1649 1650 // Signed long multiply by scalar element (second part). 1651 void smull2(const VRegister& vd, 1652 const VRegister& vn, 1653 const VRegister& vm, 1654 int vm_index); 1655 1656 // Unsigned long multiply by scalar element. 1657 void umull(const VRegister& vd, 1658 const VRegister& vn, 1659 const VRegister& vm, 1660 int vm_index); 1661 1662 // Unsigned long multiply by scalar element (second part). 1663 void umull2(const VRegister& vd, 1664 const VRegister& vn, 1665 const VRegister& vm, 1666 int vm_index); 1667 1668 // Signed saturating double long multiply by element. 1669 void sqdmull(const VRegister& vd, 1670 const VRegister& vn, 1671 const VRegister& vm, 1672 int vm_index); 1673 1674 // Signed saturating double long multiply by element (second part). 1675 void sqdmull2(const VRegister& vd, 1676 const VRegister& vn, 1677 const VRegister& vm, 1678 int vm_index); 1679 1680 // Signed saturating doubling long multiply-add by element. 1681 void sqdmlal(const VRegister& vd, 1682 const VRegister& vn, 1683 const VRegister& vm, 1684 int vm_index); 1685 1686 // Signed saturating doubling long multiply-add by element (second part). 1687 void sqdmlal2(const VRegister& vd, 1688 const VRegister& vn, 1689 const VRegister& vm, 1690 int vm_index); 1691 1692 // Signed saturating doubling long multiply-sub by element. 1693 void sqdmlsl(const VRegister& vd, 1694 const VRegister& vn, 1695 const VRegister& vm, 1696 int vm_index); 1697 1698 // Signed saturating doubling long multiply-sub by element (second part). 1699 void sqdmlsl2(const VRegister& vd, 1700 const VRegister& vn, 1701 const VRegister& vm, 1702 int vm_index); 1703 1704 // Compare equal. 1705 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1706 1707 // Compare signed greater than or equal. 1708 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1709 1710 // Compare signed greater than. 1711 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1712 1713 // Compare unsigned higher. 1714 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1715 1716 // Compare unsigned higher or same. 1717 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1718 1719 // Compare bitwise test bits nonzero. 1720 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1721 1722 // Compare bitwise to zero. 1723 void cmeq(const VRegister& vd, const VRegister& vn, int value); 1724 1725 // Compare signed greater than or equal to zero. 1726 void cmge(const VRegister& vd, const VRegister& vn, int value); 1727 1728 // Compare signed greater than zero. 1729 void cmgt(const VRegister& vd, const VRegister& vn, int value); 1730 1731 // Compare signed less than or equal to zero. 1732 void cmle(const VRegister& vd, const VRegister& vn, int value); 1733 1734 // Compare signed less than zero. 1735 void cmlt(const VRegister& vd, const VRegister& vn, int value); 1736 1737 // Signed shift left by register. 1738 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1739 1740 // Unsigned shift left by register. 1741 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1742 1743 // Signed saturating shift left by register. 1744 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1745 1746 // Unsigned saturating shift left by register. 1747 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1748 1749 // Signed rounding shift left by register. 1750 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1751 1752 // Unsigned rounding shift left by register. 1753 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1754 1755 // Signed saturating rounding shift left by register. 1756 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1757 1758 // Unsigned saturating rounding shift left by register. 1759 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1760 1761 // Bitwise and. 1762 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1763 1764 // Bitwise or. 1765 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1766 1767 // Bitwise or immediate. 1768 void orr(const VRegister& vd, const int imm8, const int left_shift = 0); 1769 1770 // Move register to register. 1771 void mov(const VRegister& vd, const VRegister& vn); 1772 1773 // Bitwise orn. 1774 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1775 1776 // Bitwise eor. 1777 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1778 1779 // Bit clear immediate. 1780 void bic(const VRegister& vd, const int imm8, const int left_shift = 0); 1781 1782 // Bit clear. 1783 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1784 1785 // Bitwise insert if false. 1786 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1787 1788 // Bitwise insert if true. 1789 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1790 1791 // Bitwise select. 1792 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1793 1794 // Polynomial multiply. 1795 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1796 1797 // Vector move immediate. 1798 void movi(const VRegister& vd, 1799 const uint64_t imm, 1800 Shift shift = LSL, 1801 const int shift_amount = 0); 1802 1803 // Bitwise not. 1804 void mvn(const VRegister& vd, const VRegister& vn); 1805 1806 // Vector move inverted immediate. 1807 void mvni(const VRegister& vd, 1808 const int imm8, 1809 Shift shift = LSL, 1810 const int shift_amount = 0); 1811 1812 // Signed saturating accumulate of unsigned value. 1813 void suqadd(const VRegister& vd, const VRegister& vn); 1814 1815 // Unsigned saturating accumulate of signed value. 1816 void usqadd(const VRegister& vd, const VRegister& vn); 1817 1818 // Absolute value. 1819 void abs(const VRegister& vd, const VRegister& vn); 1820 1821 // Signed saturating absolute value. 1822 void sqabs(const VRegister& vd, const VRegister& vn); 1823 1824 // Negate. 1825 void neg(const VRegister& vd, const VRegister& vn); 1826 1827 // Signed saturating negate. 1828 void sqneg(const VRegister& vd, const VRegister& vn); 1829 1830 // Bitwise not. 1831 void not_(const VRegister& vd, const VRegister& vn); 1832 1833 // Extract narrow. 1834 void xtn(const VRegister& vd, const VRegister& vn); 1835 1836 // Extract narrow (second part). 1837 void xtn2(const VRegister& vd, const VRegister& vn); 1838 1839 // Signed saturating extract narrow. 1840 void sqxtn(const VRegister& vd, const VRegister& vn); 1841 1842 // Signed saturating extract narrow (second part). 1843 void sqxtn2(const VRegister& vd, const VRegister& vn); 1844 1845 // Unsigned saturating extract narrow. 1846 void uqxtn(const VRegister& vd, const VRegister& vn); 1847 1848 // Unsigned saturating extract narrow (second part). 1849 void uqxtn2(const VRegister& vd, const VRegister& vn); 1850 1851 // Signed saturating extract unsigned narrow. 1852 void sqxtun(const VRegister& vd, const VRegister& vn); 1853 1854 // Signed saturating extract unsigned narrow (second part). 1855 void sqxtun2(const VRegister& vd, const VRegister& vn); 1856 1857 // Extract vector from pair of vectors. 1858 void ext(const VRegister& vd, 1859 const VRegister& vn, 1860 const VRegister& vm, 1861 int index); 1862 1863 // Duplicate vector element to vector or scalar. 1864 void dup(const VRegister& vd, const VRegister& vn, int vn_index); 1865 1866 // Move vector element to scalar. 1867 void mov(const VRegister& vd, const VRegister& vn, int vn_index); 1868 1869 // Duplicate general-purpose register to vector. 1870 void dup(const VRegister& vd, const Register& rn); 1871 1872 // Insert vector element from another vector element. 1873 void ins(const VRegister& vd, 1874 int vd_index, 1875 const VRegister& vn, 1876 int vn_index); 1877 1878 // Move vector element to another vector element. 1879 void mov(const VRegister& vd, 1880 int vd_index, 1881 const VRegister& vn, 1882 int vn_index); 1883 1884 // Insert vector element from general-purpose register. 1885 void ins(const VRegister& vd, int vd_index, const Register& rn); 1886 1887 // Move general-purpose register to a vector element. 1888 void mov(const VRegister& vd, int vd_index, const Register& rn); 1889 1890 // Unsigned move vector element to general-purpose register. 1891 void umov(const Register& rd, const VRegister& vn, int vn_index); 1892 1893 // Move vector element to general-purpose register. 1894 void mov(const Register& rd, const VRegister& vn, int vn_index); 1895 1896 // Signed move vector element to general-purpose register. 1897 void smov(const Register& rd, const VRegister& vn, int vn_index); 1898 1899 // One-element structure load to one register. 1900 void ld1(const VRegister& vt, const MemOperand& src); 1901 1902 // One-element structure load to two registers. 1903 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1904 1905 // One-element structure load to three registers. 1906 void ld1(const VRegister& vt, 1907 const VRegister& vt2, 1908 const VRegister& vt3, 1909 const MemOperand& src); 1910 1911 // One-element structure load to four registers. 1912 void ld1(const VRegister& vt, 1913 const VRegister& vt2, 1914 const VRegister& vt3, 1915 const VRegister& vt4, 1916 const MemOperand& src); 1917 1918 // One-element single structure load to one lane. 1919 void ld1(const VRegister& vt, int lane, const MemOperand& src); 1920 1921 // One-element single structure load to all lanes. 1922 void ld1r(const VRegister& vt, const MemOperand& src); 1923 1924 // Two-element structure load. 1925 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1926 1927 // Two-element single structure load to one lane. 1928 void ld2(const VRegister& vt, 1929 const VRegister& vt2, 1930 int lane, 1931 const MemOperand& src); 1932 1933 // Two-element single structure load to all lanes. 1934 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1935 1936 // Three-element structure load. 1937 void ld3(const VRegister& vt, 1938 const VRegister& vt2, 1939 const VRegister& vt3, 1940 const MemOperand& src); 1941 1942 // Three-element single structure load to one lane. 1943 void ld3(const VRegister& vt, 1944 const VRegister& vt2, 1945 const VRegister& vt3, 1946 int lane, 1947 const MemOperand& src); 1948 1949 // Three-element single structure load to all lanes. 1950 void ld3r(const VRegister& vt, 1951 const VRegister& vt2, 1952 const VRegister& vt3, 1953 const MemOperand& src); 1954 1955 // Four-element structure load. 1956 void ld4(const VRegister& vt, 1957 const VRegister& vt2, 1958 const VRegister& vt3, 1959 const VRegister& vt4, 1960 const MemOperand& src); 1961 1962 // Four-element single structure load to one lane. 1963 void ld4(const VRegister& vt, 1964 const VRegister& vt2, 1965 const VRegister& vt3, 1966 const VRegister& vt4, 1967 int lane, 1968 const MemOperand& src); 1969 1970 // Four-element single structure load to all lanes. 1971 void ld4r(const VRegister& vt, 1972 const VRegister& vt2, 1973 const VRegister& vt3, 1974 const VRegister& vt4, 1975 const MemOperand& src); 1976 1977 // Count leading sign bits. 1978 void cls(const VRegister& vd, const VRegister& vn); 1979 1980 // Count leading zero bits (vector). 1981 void clz(const VRegister& vd, const VRegister& vn); 1982 1983 // Population count per byte. 1984 void cnt(const VRegister& vd, const VRegister& vn); 1985 1986 // Reverse bit order. 1987 void rbit(const VRegister& vd, const VRegister& vn); 1988 1989 // Reverse elements in 16-bit halfwords. 1990 void rev16(const VRegister& vd, const VRegister& vn); 1991 1992 // Reverse elements in 32-bit words. 1993 void rev32(const VRegister& vd, const VRegister& vn); 1994 1995 // Reverse elements in 64-bit doublewords. 1996 void rev64(const VRegister& vd, const VRegister& vn); 1997 1998 // Unsigned reciprocal square root estimate. 1999 void ursqrte(const VRegister& vd, const VRegister& vn); 2000 2001 // Unsigned reciprocal estimate. 2002 void urecpe(const VRegister& vd, const VRegister& vn); 2003 2004 // Signed pairwise long add. 2005 void saddlp(const VRegister& vd, const VRegister& vn); 2006 2007 // Unsigned pairwise long add. 2008 void uaddlp(const VRegister& vd, const VRegister& vn); 2009 2010 // Signed pairwise long add and accumulate. 2011 void sadalp(const VRegister& vd, const VRegister& vn); 2012 2013 // Unsigned pairwise long add and accumulate. 2014 void uadalp(const VRegister& vd, const VRegister& vn); 2015 2016 // Shift left by immediate. 2017 void shl(const VRegister& vd, const VRegister& vn, int shift); 2018 2019 // Signed saturating shift left by immediate. 2020 void sqshl(const VRegister& vd, const VRegister& vn, int shift); 2021 2022 // Signed saturating shift left unsigned by immediate. 2023 void sqshlu(const VRegister& vd, const VRegister& vn, int shift); 2024 2025 // Unsigned saturating shift left by immediate. 2026 void uqshl(const VRegister& vd, const VRegister& vn, int shift); 2027 2028 // Signed shift left long by immediate. 2029 void sshll(const VRegister& vd, const VRegister& vn, int shift); 2030 2031 // Signed shift left long by immediate (second part). 2032 void sshll2(const VRegister& vd, const VRegister& vn, int shift); 2033 2034 // Signed extend long. 2035 void sxtl(const VRegister& vd, const VRegister& vn); 2036 2037 // Signed extend long (second part). 2038 void sxtl2(const VRegister& vd, const VRegister& vn); 2039 2040 // Unsigned shift left long by immediate. 2041 void ushll(const VRegister& vd, const VRegister& vn, int shift); 2042 2043 // Unsigned shift left long by immediate (second part). 2044 void ushll2(const VRegister& vd, const VRegister& vn, int shift); 2045 2046 // Shift left long by element size. 2047 void shll(const VRegister& vd, const VRegister& vn, int shift); 2048 2049 // Shift left long by element size (second part). 2050 void shll2(const VRegister& vd, const VRegister& vn, int shift); 2051 2052 // Unsigned extend long. 2053 void uxtl(const VRegister& vd, const VRegister& vn); 2054 2055 // Unsigned extend long (second part). 2056 void uxtl2(const VRegister& vd, const VRegister& vn); 2057 2058 // Shift left by immediate and insert. 2059 void sli(const VRegister& vd, const VRegister& vn, int shift); 2060 2061 // Shift right by immediate and insert. 2062 void sri(const VRegister& vd, const VRegister& vn, int shift); 2063 2064 // Signed maximum. 2065 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2066 2067 // Signed pairwise maximum. 2068 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2069 2070 // Add across vector. 2071 void addv(const VRegister& vd, const VRegister& vn); 2072 2073 // Signed add long across vector. 2074 void saddlv(const VRegister& vd, const VRegister& vn); 2075 2076 // Unsigned add long across vector. 2077 void uaddlv(const VRegister& vd, const VRegister& vn); 2078 2079 // FP maximum number across vector. 2080 void fmaxnmv(const VRegister& vd, const VRegister& vn); 2081 2082 // FP maximum across vector. 2083 void fmaxv(const VRegister& vd, const VRegister& vn); 2084 2085 // FP minimum number across vector. 2086 void fminnmv(const VRegister& vd, const VRegister& vn); 2087 2088 // FP minimum across vector. 2089 void fminv(const VRegister& vd, const VRegister& vn); 2090 2091 // Signed maximum across vector. 2092 void smaxv(const VRegister& vd, const VRegister& vn); 2093 2094 // Signed minimum. 2095 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2096 2097 // Signed minimum pairwise. 2098 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2099 2100 // Signed minimum across vector. 2101 void sminv(const VRegister& vd, const VRegister& vn); 2102 2103 // One-element structure store from one register. 2104 void st1(const VRegister& vt, const MemOperand& src); 2105 2106 // One-element structure store from two registers. 2107 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 2108 2109 // One-element structure store from three registers. 2110 void st1(const VRegister& vt, 2111 const VRegister& vt2, 2112 const VRegister& vt3, 2113 const MemOperand& src); 2114 2115 // One-element structure store from four registers. 2116 void st1(const VRegister& vt, 2117 const VRegister& vt2, 2118 const VRegister& vt3, 2119 const VRegister& vt4, 2120 const MemOperand& src); 2121 2122 // One-element single structure store from one lane. 2123 void st1(const VRegister& vt, int lane, const MemOperand& src); 2124 2125 // Two-element structure store from two registers. 2126 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 2127 2128 // Two-element single structure store from two lanes. 2129 void st2(const VRegister& vt, 2130 const VRegister& vt2, 2131 int lane, 2132 const MemOperand& src); 2133 2134 // Three-element structure store from three registers. 2135 void st3(const VRegister& vt, 2136 const VRegister& vt2, 2137 const VRegister& vt3, 2138 const MemOperand& src); 2139 2140 // Three-element single structure store from three lanes. 2141 void st3(const VRegister& vt, 2142 const VRegister& vt2, 2143 const VRegister& vt3, 2144 int lane, 2145 const MemOperand& src); 2146 2147 // Four-element structure store from four registers. 2148 void st4(const VRegister& vt, 2149 const VRegister& vt2, 2150 const VRegister& vt3, 2151 const VRegister& vt4, 2152 const MemOperand& src); 2153 2154 // Four-element single structure store from four lanes. 2155 void st4(const VRegister& vt, 2156 const VRegister& vt2, 2157 const VRegister& vt3, 2158 const VRegister& vt4, 2159 int lane, 2160 const MemOperand& src); 2161 2162 // Unsigned add long. 2163 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2164 2165 // Unsigned add long (second part). 2166 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2167 2168 // Unsigned add wide. 2169 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2170 2171 // Unsigned add wide (second part). 2172 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2173 2174 // Signed add long. 2175 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2176 2177 // Signed add long (second part). 2178 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2179 2180 // Signed add wide. 2181 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2182 2183 // Signed add wide (second part). 2184 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2185 2186 // Unsigned subtract long. 2187 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2188 2189 // Unsigned subtract long (second part). 2190 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2191 2192 // Unsigned subtract wide. 2193 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2194 2195 // Unsigned subtract wide (second part). 2196 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2197 2198 // Signed subtract long. 2199 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2200 2201 // Signed subtract long (second part). 2202 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2203 2204 // Signed integer subtract wide. 2205 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2206 2207 // Signed integer subtract wide (second part). 2208 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2209 2210 // Unsigned maximum. 2211 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2212 2213 // Unsigned pairwise maximum. 2214 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2215 2216 // Unsigned maximum across vector. 2217 void umaxv(const VRegister& vd, const VRegister& vn); 2218 2219 // Unsigned minimum. 2220 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2221 2222 // Unsigned pairwise minimum. 2223 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2224 2225 // Unsigned minimum across vector. 2226 void uminv(const VRegister& vd, const VRegister& vn); 2227 2228 // Transpose vectors (primary). 2229 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2230 2231 // Transpose vectors (secondary). 2232 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2233 2234 // Unzip vectors (primary). 2235 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2236 2237 // Unzip vectors (secondary). 2238 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2239 2240 // Zip vectors (primary). 2241 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2242 2243 // Zip vectors (secondary). 2244 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2245 2246 // Signed shift right by immediate. 2247 void sshr(const VRegister& vd, const VRegister& vn, int shift); 2248 2249 // Unsigned shift right by immediate. 2250 void ushr(const VRegister& vd, const VRegister& vn, int shift); 2251 2252 // Signed rounding shift right by immediate. 2253 void srshr(const VRegister& vd, const VRegister& vn, int shift); 2254 2255 // Unsigned rounding shift right by immediate. 2256 void urshr(const VRegister& vd, const VRegister& vn, int shift); 2257 2258 // Signed shift right by immediate and accumulate. 2259 void ssra(const VRegister& vd, const VRegister& vn, int shift); 2260 2261 // Unsigned shift right by immediate and accumulate. 2262 void usra(const VRegister& vd, const VRegister& vn, int shift); 2263 2264 // Signed rounding shift right by immediate and accumulate. 2265 void srsra(const VRegister& vd, const VRegister& vn, int shift); 2266 2267 // Unsigned rounding shift right by immediate and accumulate. 2268 void ursra(const VRegister& vd, const VRegister& vn, int shift); 2269 2270 // Shift right narrow by immediate. 2271 void shrn(const VRegister& vd, const VRegister& vn, int shift); 2272 2273 // Shift right narrow by immediate (second part). 2274 void shrn2(const VRegister& vd, const VRegister& vn, int shift); 2275 2276 // Rounding shift right narrow by immediate. 2277 void rshrn(const VRegister& vd, const VRegister& vn, int shift); 2278 2279 // Rounding shift right narrow by immediate (second part). 2280 void rshrn2(const VRegister& vd, const VRegister& vn, int shift); 2281 2282 // Unsigned saturating shift right narrow by immediate. 2283 void uqshrn(const VRegister& vd, const VRegister& vn, int shift); 2284 2285 // Unsigned saturating shift right narrow by immediate (second part). 2286 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); 2287 2288 // Unsigned saturating rounding shift right narrow by immediate. 2289 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); 2290 2291 // Unsigned saturating rounding shift right narrow by immediate (second part). 2292 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 2293 2294 // Signed saturating shift right narrow by immediate. 2295 void sqshrn(const VRegister& vd, const VRegister& vn, int shift); 2296 2297 // Signed saturating shift right narrow by immediate (second part). 2298 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); 2299 2300 // Signed saturating rounded shift right narrow by immediate. 2301 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); 2302 2303 // Signed saturating rounded shift right narrow by immediate (second part). 2304 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 2305 2306 // Signed saturating shift right unsigned narrow by immediate. 2307 void sqshrun(const VRegister& vd, const VRegister& vn, int shift); 2308 2309 // Signed saturating shift right unsigned narrow by immediate (second part). 2310 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); 2311 2312 // Signed sat rounded shift right unsigned narrow by immediate. 2313 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); 2314 2315 // Signed sat rounded shift right unsigned narrow by immediate (second part). 2316 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); 2317 2318 // FP reciprocal step. 2319 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2320 2321 // FP reciprocal estimate. 2322 void frecpe(const VRegister& vd, const VRegister& vn); 2323 2324 // FP reciprocal square root estimate. 2325 void frsqrte(const VRegister& vd, const VRegister& vn); 2326 2327 // FP reciprocal square root step. 2328 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2329 2330 // Signed absolute difference and accumulate long. 2331 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2332 2333 // Signed absolute difference and accumulate long (second part). 2334 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2335 2336 // Unsigned absolute difference and accumulate long. 2337 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2338 2339 // Unsigned absolute difference and accumulate long (second part). 2340 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2341 2342 // Signed absolute difference long. 2343 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2344 2345 // Signed absolute difference long (second part). 2346 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2347 2348 // Unsigned absolute difference long. 2349 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2350 2351 // Unsigned absolute difference long (second part). 2352 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2353 2354 // Polynomial multiply long. 2355 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2356 2357 // Polynomial multiply long (second part). 2358 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2359 2360 // Signed long multiply-add. 2361 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2362 2363 // Signed long multiply-add (second part). 2364 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2365 2366 // Unsigned long multiply-add. 2367 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2368 2369 // Unsigned long multiply-add (second part). 2370 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2371 2372 // Signed long multiply-sub. 2373 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2374 2375 // Signed long multiply-sub (second part). 2376 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2377 2378 // Unsigned long multiply-sub. 2379 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2380 2381 // Unsigned long multiply-sub (second part). 2382 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2383 2384 // Signed long multiply. 2385 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2386 2387 // Signed long multiply (second part). 2388 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2389 2390 // Signed saturating doubling long multiply-add. 2391 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2392 2393 // Signed saturating doubling long multiply-add (second part). 2394 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2395 2396 // Signed saturating doubling long multiply-subtract. 2397 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2398 2399 // Signed saturating doubling long multiply-subtract (second part). 2400 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2401 2402 // Signed saturating doubling long multiply. 2403 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2404 2405 // Signed saturating doubling long multiply (second part). 2406 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2407 2408 // Signed saturating doubling multiply returning high half. 2409 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2410 2411 // Signed saturating rounding doubling multiply returning high half. 2412 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2413 2414 // Signed saturating doubling multiply element returning high half. 2415 void sqdmulh(const VRegister& vd, 2416 const VRegister& vn, 2417 const VRegister& vm, 2418 int vm_index); 2419 2420 // Signed saturating rounding doubling multiply element returning high half. 2421 void sqrdmulh(const VRegister& vd, 2422 const VRegister& vn, 2423 const VRegister& vm, 2424 int vm_index); 2425 2426 // Unsigned long multiply long. 2427 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2428 2429 // Unsigned long multiply (second part). 2430 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2431 2432 // Add narrow returning high half. 2433 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2434 2435 // Add narrow returning high half (second part). 2436 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2437 2438 // Rounding add narrow returning high half. 2439 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2440 2441 // Rounding add narrow returning high half (second part). 2442 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2443 2444 // Subtract narrow returning high half. 2445 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2446 2447 // Subtract narrow returning high half (second part). 2448 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2449 2450 // Rounding subtract narrow returning high half. 2451 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2452 2453 // Rounding subtract narrow returning high half (second part). 2454 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2455 2456 // FP vector multiply accumulate. 2457 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2458 2459 // FP vector multiply subtract. 2460 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2461 2462 // FP vector multiply extended. 2463 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2464 2465 // FP absolute greater than or equal. 2466 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2467 2468 // FP absolute greater than. 2469 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2470 2471 // FP multiply by element. 2472 void fmul(const VRegister& vd, 2473 const VRegister& vn, 2474 const VRegister& vm, 2475 int vm_index); 2476 2477 // FP fused multiply-add to accumulator by element. 2478 void fmla(const VRegister& vd, 2479 const VRegister& vn, 2480 const VRegister& vm, 2481 int vm_index); 2482 2483 // FP fused multiply-sub from accumulator by element. 2484 void fmls(const VRegister& vd, 2485 const VRegister& vn, 2486 const VRegister& vm, 2487 int vm_index); 2488 2489 // FP multiply extended by element. 2490 void fmulx(const VRegister& vd, 2491 const VRegister& vn, 2492 const VRegister& vm, 2493 int vm_index); 2494 2495 // FP compare equal. 2496 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2497 2498 // FP greater than. 2499 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2500 2501 // FP greater than or equal. 2502 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2503 2504 // FP compare equal to zero. 2505 void fcmeq(const VRegister& vd, const VRegister& vn, double imm); 2506 2507 // FP greater than zero. 2508 void fcmgt(const VRegister& vd, const VRegister& vn, double imm); 2509 2510 // FP greater than or equal to zero. 2511 void fcmge(const VRegister& vd, const VRegister& vn, double imm); 2512 2513 // FP less than or equal to zero. 2514 void fcmle(const VRegister& vd, const VRegister& vn, double imm); 2515 2516 // FP less than to zero. 2517 void fcmlt(const VRegister& vd, const VRegister& vn, double imm); 2518 2519 // FP absolute difference. 2520 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2521 2522 // FP pairwise add vector. 2523 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2524 2525 // FP pairwise add scalar. 2526 void faddp(const VRegister& vd, const VRegister& vn); 2527 2528 // FP pairwise maximum vector. 2529 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2530 2531 // FP pairwise maximum scalar. 2532 void fmaxp(const VRegister& vd, const VRegister& vn); 2533 2534 // FP pairwise minimum vector. 2535 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2536 2537 // FP pairwise minimum scalar. 2538 void fminp(const VRegister& vd, const VRegister& vn); 2539 2540 // FP pairwise maximum number vector. 2541 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2542 2543 // FP pairwise maximum number scalar. 2544 void fmaxnmp(const VRegister& vd, const VRegister& vn); 2545 2546 // FP pairwise minimum number vector. 2547 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2548 2549 // FP pairwise minimum number scalar. 2550 void fminnmp(const VRegister& vd, const VRegister& vn); 2551 2552 // Emit generic instructions. 2553 // Emit raw instructions into the instruction stream. 2554 void dci(Instr raw_inst) { Emit(raw_inst); } 2555 2556 // Emit 32 bits of data into the instruction stream. 2557 void dc32(uint32_t data) { dc(data); } 2558 2559 // Emit 64 bits of data into the instruction stream. 2560 void dc64(uint64_t data) { dc(data); } 2561 2562 // Emit data in the instruction stream. 2563 template <typename T> 2564 void dc(T data) { 2565 VIXL_ASSERT(AllowAssembler()); 2566 GetBuffer()->Emit<T>(data); 2567 } 2568 2569 // Copy a string into the instruction stream, including the terminating NULL 2570 // character. The instruction pointer is then aligned correctly for 2571 // subsequent instructions. 2572 void EmitString(const char* string) { 2573 VIXL_ASSERT(string != NULL); 2574 VIXL_ASSERT(AllowAssembler()); 2575 2576 GetBuffer()->EmitString(string); 2577 GetBuffer()->Align(); 2578 } 2579 2580 // Code generation helpers. 2581 2582 // Register encoding. 2583 static Instr Rd(CPURegister rd) { 2584 VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode); 2585 return rd.GetCode() << Rd_offset; 2586 } 2587 2588 static Instr Rn(CPURegister rn) { 2589 VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode); 2590 return rn.GetCode() << Rn_offset; 2591 } 2592 2593 static Instr Rm(CPURegister rm) { 2594 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); 2595 return rm.GetCode() << Rm_offset; 2596 } 2597 2598 static Instr RmNot31(CPURegister rm) { 2599 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); 2600 VIXL_ASSERT(!rm.IsZero()); 2601 return Rm(rm); 2602 } 2603 2604 static Instr Ra(CPURegister ra) { 2605 VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode); 2606 return ra.GetCode() << Ra_offset; 2607 } 2608 2609 static Instr Rt(CPURegister rt) { 2610 VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode); 2611 return rt.GetCode() << Rt_offset; 2612 } 2613 2614 static Instr Rt2(CPURegister rt2) { 2615 VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode); 2616 return rt2.GetCode() << Rt2_offset; 2617 } 2618 2619 static Instr Rs(CPURegister rs) { 2620 VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode); 2621 return rs.GetCode() << Rs_offset; 2622 } 2623 2624 // These encoding functions allow the stack pointer to be encoded, and 2625 // disallow the zero register. 2626 static Instr RdSP(Register rd) { 2627 VIXL_ASSERT(!rd.IsZero()); 2628 return (rd.GetCode() & kRegCodeMask) << Rd_offset; 2629 } 2630 2631 static Instr RnSP(Register rn) { 2632 VIXL_ASSERT(!rn.IsZero()); 2633 return (rn.GetCode() & kRegCodeMask) << Rn_offset; 2634 } 2635 2636 // Flags encoding. 2637 static Instr Flags(FlagsUpdate S) { 2638 if (S == SetFlags) { 2639 return 1 << FlagsUpdate_offset; 2640 } else if (S == LeaveFlags) { 2641 return 0 << FlagsUpdate_offset; 2642 } 2643 VIXL_UNREACHABLE(); 2644 return 0; 2645 } 2646 2647 static Instr Cond(Condition cond) { return cond << Condition_offset; } 2648 2649 // PC-relative address encoding. 2650 static Instr ImmPCRelAddress(int64_t imm21) { 2651 VIXL_ASSERT(IsInt21(imm21)); 2652 Instr imm = static_cast<Instr>(TruncateToUint21(imm21)); 2653 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset; 2654 Instr immlo = imm << ImmPCRelLo_offset; 2655 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask); 2656 } 2657 2658 // Branch encoding. 2659 static Instr ImmUncondBranch(int64_t imm26) { 2660 VIXL_ASSERT(IsInt26(imm26)); 2661 return TruncateToUint26(imm26) << ImmUncondBranch_offset; 2662 } 2663 2664 static Instr ImmCondBranch(int64_t imm19) { 2665 VIXL_ASSERT(IsInt19(imm19)); 2666 return TruncateToUint19(imm19) << ImmCondBranch_offset; 2667 } 2668 2669 static Instr ImmCmpBranch(int64_t imm19) { 2670 VIXL_ASSERT(IsInt19(imm19)); 2671 return TruncateToUint19(imm19) << ImmCmpBranch_offset; 2672 } 2673 2674 static Instr ImmTestBranch(int64_t imm14) { 2675 VIXL_ASSERT(IsInt14(imm14)); 2676 return TruncateToUint14(imm14) << ImmTestBranch_offset; 2677 } 2678 2679 static Instr ImmTestBranchBit(unsigned bit_pos) { 2680 VIXL_ASSERT(IsUint6(bit_pos)); 2681 // Subtract five from the shift offset, as we need bit 5 from bit_pos. 2682 unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5); 2683 unsigned b40 = bit_pos << ImmTestBranchBit40_offset; 2684 b5 &= ImmTestBranchBit5_mask; 2685 b40 &= ImmTestBranchBit40_mask; 2686 return b5 | b40; 2687 } 2688 2689 // Data Processing encoding. 2690 static Instr SF(Register rd) { 2691 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits; 2692 } 2693 2694 static Instr ImmAddSub(int imm) { 2695 VIXL_ASSERT(IsImmAddSub(imm)); 2696 if (IsUint12(imm)) { // No shift required. 2697 imm <<= ImmAddSub_offset; 2698 } else { 2699 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset); 2700 } 2701 return imm; 2702 } 2703 2704 static Instr ImmS(unsigned imms, unsigned reg_size) { 2705 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) || 2706 ((reg_size == kWRegSize) && IsUint5(imms))); 2707 USE(reg_size); 2708 return imms << ImmS_offset; 2709 } 2710 2711 static Instr ImmR(unsigned immr, unsigned reg_size) { 2712 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || 2713 ((reg_size == kWRegSize) && IsUint5(immr))); 2714 USE(reg_size); 2715 VIXL_ASSERT(IsUint6(immr)); 2716 return immr << ImmR_offset; 2717 } 2718 2719 static Instr ImmSetBits(unsigned imms, unsigned reg_size) { 2720 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 2721 VIXL_ASSERT(IsUint6(imms)); 2722 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3)); 2723 USE(reg_size); 2724 return imms << ImmSetBits_offset; 2725 } 2726 2727 static Instr ImmRotate(unsigned immr, unsigned reg_size) { 2728 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 2729 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || 2730 ((reg_size == kWRegSize) && IsUint5(immr))); 2731 USE(reg_size); 2732 return immr << ImmRotate_offset; 2733 } 2734 2735 static Instr ImmLLiteral(int64_t imm19) { 2736 VIXL_ASSERT(IsInt19(imm19)); 2737 return TruncateToUint19(imm19) << ImmLLiteral_offset; 2738 } 2739 2740 static Instr BitN(unsigned bitn, unsigned reg_size) { 2741 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 2742 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0)); 2743 USE(reg_size); 2744 return bitn << BitN_offset; 2745 } 2746 2747 static Instr ShiftDP(Shift shift) { 2748 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR); 2749 return shift << ShiftDP_offset; 2750 } 2751 2752 static Instr ImmDPShift(unsigned amount) { 2753 VIXL_ASSERT(IsUint6(amount)); 2754 return amount << ImmDPShift_offset; 2755 } 2756 2757 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; } 2758 2759 static Instr ImmExtendShift(unsigned left_shift) { 2760 VIXL_ASSERT(left_shift <= 4); 2761 return left_shift << ImmExtendShift_offset; 2762 } 2763 2764 static Instr ImmCondCmp(unsigned imm) { 2765 VIXL_ASSERT(IsUint5(imm)); 2766 return imm << ImmCondCmp_offset; 2767 } 2768 2769 static Instr Nzcv(StatusFlags nzcv) { 2770 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset; 2771 } 2772 2773 // MemOperand offset encoding. 2774 static Instr ImmLSUnsigned(int64_t imm12) { 2775 VIXL_ASSERT(IsUint12(imm12)); 2776 return TruncateToUint12(imm12) << ImmLSUnsigned_offset; 2777 } 2778 2779 static Instr ImmLS(int64_t imm9) { 2780 VIXL_ASSERT(IsInt9(imm9)); 2781 return TruncateToUint9(imm9) << ImmLS_offset; 2782 } 2783 2784 static Instr ImmLSPair(int64_t imm7, unsigned access_size) { 2785 VIXL_ASSERT(IsMultiple(imm7, 1 << access_size)); 2786 int64_t scaled_imm7 = imm7 / (1 << access_size); 2787 VIXL_ASSERT(IsInt7(scaled_imm7)); 2788 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset; 2789 } 2790 2791 static Instr ImmShiftLS(unsigned shift_amount) { 2792 VIXL_ASSERT(IsUint1(shift_amount)); 2793 return shift_amount << ImmShiftLS_offset; 2794 } 2795 2796 static Instr ImmPrefetchOperation(int imm5) { 2797 VIXL_ASSERT(IsUint5(imm5)); 2798 return imm5 << ImmPrefetchOperation_offset; 2799 } 2800 2801 static Instr ImmException(int imm16) { 2802 VIXL_ASSERT(IsUint16(imm16)); 2803 return imm16 << ImmException_offset; 2804 } 2805 2806 static Instr ImmSystemRegister(int imm15) { 2807 VIXL_ASSERT(IsUint15(imm15)); 2808 return imm15 << ImmSystemRegister_offset; 2809 } 2810 2811 static Instr ImmHint(int imm7) { 2812 VIXL_ASSERT(IsUint7(imm7)); 2813 return imm7 << ImmHint_offset; 2814 } 2815 2816 static Instr CRm(int imm4) { 2817 VIXL_ASSERT(IsUint4(imm4)); 2818 return imm4 << CRm_offset; 2819 } 2820 2821 static Instr CRn(int imm4) { 2822 VIXL_ASSERT(IsUint4(imm4)); 2823 return imm4 << CRn_offset; 2824 } 2825 2826 static Instr SysOp(int imm14) { 2827 VIXL_ASSERT(IsUint14(imm14)); 2828 return imm14 << SysOp_offset; 2829 } 2830 2831 static Instr ImmSysOp1(int imm3) { 2832 VIXL_ASSERT(IsUint3(imm3)); 2833 return imm3 << SysOp1_offset; 2834 } 2835 2836 static Instr ImmSysOp2(int imm3) { 2837 VIXL_ASSERT(IsUint3(imm3)); 2838 return imm3 << SysOp2_offset; 2839 } 2840 2841 static Instr ImmBarrierDomain(int imm2) { 2842 VIXL_ASSERT(IsUint2(imm2)); 2843 return imm2 << ImmBarrierDomain_offset; 2844 } 2845 2846 static Instr ImmBarrierType(int imm2) { 2847 VIXL_ASSERT(IsUint2(imm2)); 2848 return imm2 << ImmBarrierType_offset; 2849 } 2850 2851 // Move immediates encoding. 2852 static Instr ImmMoveWide(uint64_t imm) { 2853 VIXL_ASSERT(IsUint16(imm)); 2854 return static_cast<Instr>(imm << ImmMoveWide_offset); 2855 } 2856 2857 static Instr ShiftMoveWide(int64_t shift) { 2858 VIXL_ASSERT(IsUint2(shift)); 2859 return static_cast<Instr>(shift << ShiftMoveWide_offset); 2860 } 2861 2862 // FP Immediates. 2863 static Instr ImmFP32(float imm); 2864 static Instr ImmFP64(double imm); 2865 2866 // FP register type. 2867 static Instr FPType(FPRegister fd) { return fd.Is64Bits() ? FP64 : FP32; } 2868 2869 static Instr FPScale(unsigned scale) { 2870 VIXL_ASSERT(IsUint6(scale)); 2871 return scale << FPScale_offset; 2872 } 2873 2874 // Immediate field checking helpers. 2875 static bool IsImmAddSub(int64_t immediate); 2876 static bool IsImmConditionalCompare(int64_t immediate); 2877 static bool IsImmFP32(float imm); 2878 static bool IsImmFP64(double imm); 2879 static bool IsImmLogical(uint64_t value, 2880 unsigned width, 2881 unsigned* n = NULL, 2882 unsigned* imm_s = NULL, 2883 unsigned* imm_r = NULL); 2884 static bool IsImmLSPair(int64_t offset, unsigned access_size); 2885 static bool IsImmLSScaled(int64_t offset, unsigned access_size); 2886 static bool IsImmLSUnscaled(int64_t offset); 2887 static bool IsImmMovn(uint64_t imm, unsigned reg_size); 2888 static bool IsImmMovz(uint64_t imm, unsigned reg_size); 2889 2890 // Instruction bits for vector format in data processing operations. 2891 static Instr VFormat(VRegister vd) { 2892 if (vd.Is64Bits()) { 2893 switch (vd.GetLanes()) { 2894 case 2: 2895 return NEON_2S; 2896 case 4: 2897 return NEON_4H; 2898 case 8: 2899 return NEON_8B; 2900 default: 2901 return 0xffffffff; 2902 } 2903 } else { 2904 VIXL_ASSERT(vd.Is128Bits()); 2905 switch (vd.GetLanes()) { 2906 case 2: 2907 return NEON_2D; 2908 case 4: 2909 return NEON_4S; 2910 case 8: 2911 return NEON_8H; 2912 case 16: 2913 return NEON_16B; 2914 default: 2915 return 0xffffffff; 2916 } 2917 } 2918 } 2919 2920 // Instruction bits for vector format in floating point data processing 2921 // operations. 2922 static Instr FPFormat(VRegister vd) { 2923 if (vd.GetLanes() == 1) { 2924 // Floating point scalar formats. 2925 VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits()); 2926 return vd.Is64Bits() ? FP64 : FP32; 2927 } 2928 2929 // Two lane floating point vector formats. 2930 if (vd.GetLanes() == 2) { 2931 VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits()); 2932 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; 2933 } 2934 2935 // Four lane floating point vector format. 2936 VIXL_ASSERT((vd.GetLanes() == 4) && vd.Is128Bits()); 2937 return NEON_FP_4S; 2938 } 2939 2940 // Instruction bits for vector format in load and store operations. 2941 static Instr LSVFormat(VRegister vd) { 2942 if (vd.Is64Bits()) { 2943 switch (vd.GetLanes()) { 2944 case 1: 2945 return LS_NEON_1D; 2946 case 2: 2947 return LS_NEON_2S; 2948 case 4: 2949 return LS_NEON_4H; 2950 case 8: 2951 return LS_NEON_8B; 2952 default: 2953 return 0xffffffff; 2954 } 2955 } else { 2956 VIXL_ASSERT(vd.Is128Bits()); 2957 switch (vd.GetLanes()) { 2958 case 2: 2959 return LS_NEON_2D; 2960 case 4: 2961 return LS_NEON_4S; 2962 case 8: 2963 return LS_NEON_8H; 2964 case 16: 2965 return LS_NEON_16B; 2966 default: 2967 return 0xffffffff; 2968 } 2969 } 2970 } 2971 2972 // Instruction bits for scalar format in data processing operations. 2973 static Instr SFormat(VRegister vd) { 2974 VIXL_ASSERT(vd.GetLanes() == 1); 2975 switch (vd.GetSizeInBytes()) { 2976 case 1: 2977 return NEON_B; 2978 case 2: 2979 return NEON_H; 2980 case 4: 2981 return NEON_S; 2982 case 8: 2983 return NEON_D; 2984 default: 2985 return 0xffffffff; 2986 } 2987 } 2988 2989 static Instr ImmNEONHLM(int index, int num_bits) { 2990 int h, l, m; 2991 if (num_bits == 3) { 2992 VIXL_ASSERT(IsUint3(index)); 2993 h = (index >> 2) & 1; 2994 l = (index >> 1) & 1; 2995 m = (index >> 0) & 1; 2996 } else if (num_bits == 2) { 2997 VIXL_ASSERT(IsUint2(index)); 2998 h = (index >> 1) & 1; 2999 l = (index >> 0) & 1; 3000 m = 0; 3001 } else { 3002 VIXL_ASSERT(IsUint1(index) && (num_bits == 1)); 3003 h = (index >> 0) & 1; 3004 l = 0; 3005 m = 0; 3006 } 3007 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); 3008 } 3009 3010 static Instr ImmNEONExt(int imm4) { 3011 VIXL_ASSERT(IsUint4(imm4)); 3012 return imm4 << ImmNEONExt_offset; 3013 } 3014 3015 static Instr ImmNEON5(Instr format, int index) { 3016 VIXL_ASSERT(IsUint4(index)); 3017 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 3018 int imm5 = (index << (s + 1)) | (1 << s); 3019 return imm5 << ImmNEON5_offset; 3020 } 3021 3022 static Instr ImmNEON4(Instr format, int index) { 3023 VIXL_ASSERT(IsUint4(index)); 3024 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 3025 int imm4 = index << s; 3026 return imm4 << ImmNEON4_offset; 3027 } 3028 3029 static Instr ImmNEONabcdefgh(int imm8) { 3030 VIXL_ASSERT(IsUint8(imm8)); 3031 Instr instr; 3032 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; 3033 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; 3034 return instr; 3035 } 3036 3037 static Instr NEONCmode(int cmode) { 3038 VIXL_ASSERT(IsUint4(cmode)); 3039 return cmode << NEONCmode_offset; 3040 } 3041 3042 static Instr NEONModImmOp(int op) { 3043 VIXL_ASSERT(IsUint1(op)); 3044 return op << NEONModImmOp_offset; 3045 } 3046 3047 // Size of the code generated since label to the current position. 3048 size_t GetSizeOfCodeGeneratedSince(Label* label) const { 3049 VIXL_ASSERT(label->IsBound()); 3050 return GetBuffer().GetOffsetFrom(label->GetLocation()); 3051 } 3052 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince", 3053 size_t SizeOfCodeGeneratedSince(Label* label) const) { 3054 return GetSizeOfCodeGeneratedSince(label); 3055 } 3056 3057 VIXL_DEPRECATED("GetBuffer().GetCapacity()", 3058 size_t GetBufferCapacity() const) { 3059 return GetBuffer().GetCapacity(); 3060 } 3061 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) { 3062 return GetBuffer().GetCapacity(); 3063 } 3064 3065 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()", 3066 size_t GetRemainingBufferSpace() const) { 3067 return GetBuffer().GetRemainingBytes(); 3068 } 3069 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()", 3070 size_t RemainingBufferSpace() const) { 3071 return GetBuffer().GetRemainingBytes(); 3072 } 3073 3074 PositionIndependentCodeOption GetPic() const { return pic_; } 3075 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) { 3076 return GetPic(); 3077 } 3078 3079 bool AllowPageOffsetDependentCode() const { 3080 return (GetPic() == PageOffsetDependentCode) || 3081 (GetPic() == PositionDependentCode); 3082 } 3083 3084 static Register AppropriateZeroRegFor(const CPURegister& reg) { 3085 return reg.Is64Bits() ? Register(xzr) : Register(wzr); 3086 } 3087 3088 protected: 3089 void LoadStore(const CPURegister& rt, 3090 const MemOperand& addr, 3091 LoadStoreOp op, 3092 LoadStoreScalingOption option = PreferScaledOffset); 3093 3094 void LoadStorePair(const CPURegister& rt, 3095 const CPURegister& rt2, 3096 const MemOperand& addr, 3097 LoadStorePairOp op); 3098 void LoadStoreStruct(const VRegister& vt, 3099 const MemOperand& addr, 3100 NEONLoadStoreMultiStructOp op); 3101 void LoadStoreStruct1(const VRegister& vt, 3102 int reg_count, 3103 const MemOperand& addr); 3104 void LoadStoreStructSingle(const VRegister& vt, 3105 uint32_t lane, 3106 const MemOperand& addr, 3107 NEONLoadStoreSingleStructOp op); 3108 void LoadStoreStructSingleAllLanes(const VRegister& vt, 3109 const MemOperand& addr, 3110 NEONLoadStoreSingleStructOp op); 3111 void LoadStoreStructVerify(const VRegister& vt, 3112 const MemOperand& addr, 3113 Instr op); 3114 3115 void Prefetch(PrefetchOperation op, 3116 const MemOperand& addr, 3117 LoadStoreScalingOption option = PreferScaledOffset); 3118 3119 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2 3120 // reports a bogus uninitialised warning then. 3121 void Logical(const Register& rd, 3122 const Register& rn, 3123 const Operand operand, 3124 LogicalOp op); 3125 void LogicalImmediate(const Register& rd, 3126 const Register& rn, 3127 unsigned n, 3128 unsigned imm_s, 3129 unsigned imm_r, 3130 LogicalOp op); 3131 3132 void ConditionalCompare(const Register& rn, 3133 const Operand& operand, 3134 StatusFlags nzcv, 3135 Condition cond, 3136 ConditionalCompareOp op); 3137 3138 void AddSubWithCarry(const Register& rd, 3139 const Register& rn, 3140 const Operand& operand, 3141 FlagsUpdate S, 3142 AddSubWithCarryOp op); 3143 3144 3145 // Functions for emulating operands not directly supported by the instruction 3146 // set. 3147 void EmitShift(const Register& rd, 3148 const Register& rn, 3149 Shift shift, 3150 unsigned amount); 3151 void EmitExtendShift(const Register& rd, 3152 const Register& rn, 3153 Extend extend, 3154 unsigned left_shift); 3155 3156 void AddSub(const Register& rd, 3157 const Register& rn, 3158 const Operand& operand, 3159 FlagsUpdate S, 3160 AddSubOp op); 3161 3162 void NEONTable(const VRegister& vd, 3163 const VRegister& vn, 3164 const VRegister& vm, 3165 NEONTableOp op); 3166 3167 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified 3168 // registers. Only simple loads are supported; sign- and zero-extension (such 3169 // as in LDPSW_x or LDRB_w) are not supported. 3170 static LoadStoreOp LoadOpFor(const CPURegister& rt); 3171 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt, 3172 const CPURegister& rt2); 3173 static LoadStoreOp StoreOpFor(const CPURegister& rt); 3174 static LoadStorePairOp StorePairOpFor(const CPURegister& rt, 3175 const CPURegister& rt2); 3176 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor( 3177 const CPURegister& rt, const CPURegister& rt2); 3178 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor( 3179 const CPURegister& rt, const CPURegister& rt2); 3180 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); 3181 3182 3183 private: 3184 static uint32_t FP32ToImm8(float imm); 3185 static uint32_t FP64ToImm8(double imm); 3186 3187 // Instruction helpers. 3188 void MoveWide(const Register& rd, 3189 uint64_t imm, 3190 int shift, 3191 MoveWideImmediateOp mov_op); 3192 void DataProcShiftedRegister(const Register& rd, 3193 const Register& rn, 3194 const Operand& operand, 3195 FlagsUpdate S, 3196 Instr op); 3197 void DataProcExtendedRegister(const Register& rd, 3198 const Register& rn, 3199 const Operand& operand, 3200 FlagsUpdate S, 3201 Instr op); 3202 void LoadStorePairNonTemporal(const CPURegister& rt, 3203 const CPURegister& rt2, 3204 const MemOperand& addr, 3205 LoadStorePairNonTemporalOp op); 3206 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op); 3207 void ConditionalSelect(const Register& rd, 3208 const Register& rn, 3209 const Register& rm, 3210 Condition cond, 3211 ConditionalSelectOp op); 3212 void DataProcessing1Source(const Register& rd, 3213 const Register& rn, 3214 DataProcessing1SourceOp op); 3215 void DataProcessing3Source(const Register& rd, 3216 const Register& rn, 3217 const Register& rm, 3218 const Register& ra, 3219 DataProcessing3SourceOp op); 3220 void FPDataProcessing1Source(const VRegister& fd, 3221 const VRegister& fn, 3222 FPDataProcessing1SourceOp op); 3223 void FPDataProcessing3Source(const VRegister& fd, 3224 const VRegister& fn, 3225 const VRegister& fm, 3226 const VRegister& fa, 3227 FPDataProcessing3SourceOp op); 3228 void NEONAcrossLanesL(const VRegister& vd, 3229 const VRegister& vn, 3230 NEONAcrossLanesOp op); 3231 void NEONAcrossLanes(const VRegister& vd, 3232 const VRegister& vn, 3233 NEONAcrossLanesOp op); 3234 void NEONModifiedImmShiftLsl(const VRegister& vd, 3235 const int imm8, 3236 const int left_shift, 3237 NEONModifiedImmediateOp op); 3238 void NEONModifiedImmShiftMsl(const VRegister& vd, 3239 const int imm8, 3240 const int shift_amount, 3241 NEONModifiedImmediateOp op); 3242 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop); 3243 void NEON3Same(const VRegister& vd, 3244 const VRegister& vn, 3245 const VRegister& vm, 3246 NEON3SameOp vop); 3247 void NEONFP3Same(const VRegister& vd, 3248 const VRegister& vn, 3249 const VRegister& vm, 3250 Instr op); 3251 void NEON3DifferentL(const VRegister& vd, 3252 const VRegister& vn, 3253 const VRegister& vm, 3254 NEON3DifferentOp vop); 3255 void NEON3DifferentW(const VRegister& vd, 3256 const VRegister& vn, 3257 const VRegister& vm, 3258 NEON3DifferentOp vop); 3259 void NEON3DifferentHN(const VRegister& vd, 3260 const VRegister& vn, 3261 const VRegister& vm, 3262 NEON3DifferentOp vop); 3263 void NEONFP2RegMisc(const VRegister& vd, 3264 const VRegister& vn, 3265 NEON2RegMiscOp vop, 3266 double value = 0.0); 3267 void NEON2RegMisc(const VRegister& vd, 3268 const VRegister& vn, 3269 NEON2RegMiscOp vop, 3270 int value = 0); 3271 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); 3272 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); 3273 void NEONPerm(const VRegister& vd, 3274 const VRegister& vn, 3275 const VRegister& vm, 3276 NEONPermOp op); 3277 void NEONFPByElement(const VRegister& vd, 3278 const VRegister& vn, 3279 const VRegister& vm, 3280 int vm_index, 3281 NEONByIndexedElementOp op); 3282 void NEONByElement(const VRegister& vd, 3283 const VRegister& vn, 3284 const VRegister& vm, 3285 int vm_index, 3286 NEONByIndexedElementOp op); 3287 void NEONByElementL(const VRegister& vd, 3288 const VRegister& vn, 3289 const VRegister& vm, 3290 int vm_index, 3291 NEONByIndexedElementOp op); 3292 void NEONShiftImmediate(const VRegister& vd, 3293 const VRegister& vn, 3294 NEONShiftImmediateOp op, 3295 int immh_immb); 3296 void NEONShiftLeftImmediate(const VRegister& vd, 3297 const VRegister& vn, 3298 int shift, 3299 NEONShiftImmediateOp op); 3300 void NEONShiftRightImmediate(const VRegister& vd, 3301 const VRegister& vn, 3302 int shift, 3303 NEONShiftImmediateOp op); 3304 void NEONShiftImmediateL(const VRegister& vd, 3305 const VRegister& vn, 3306 int shift, 3307 NEONShiftImmediateOp op); 3308 void NEONShiftImmediateN(const VRegister& vd, 3309 const VRegister& vn, 3310 int shift, 3311 NEONShiftImmediateOp op); 3312 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); 3313 3314 Instr LoadStoreStructAddrModeField(const MemOperand& addr); 3315 3316 // Encode the specified MemOperand for the specified access size and scaling 3317 // preference. 3318 Instr LoadStoreMemOperand(const MemOperand& addr, 3319 unsigned access_size, 3320 LoadStoreScalingOption option); 3321 3322 // Link the current (not-yet-emitted) instruction to the specified label, then 3323 // return an offset to be encoded in the instruction. If the label is not yet 3324 // bound, an offset of 0 is returned. 3325 ptrdiff_t LinkAndGetByteOffsetTo(Label* label); 3326 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label); 3327 ptrdiff_t LinkAndGetPageOffsetTo(Label* label); 3328 3329 // A common implementation for the LinkAndGet<Type>OffsetTo helpers. 3330 template <int element_shift> 3331 ptrdiff_t LinkAndGetOffsetTo(Label* label); 3332 3333 // Literal load offset are in words (32-bit). 3334 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal); 3335 3336 // Emit the instruction in buffer_. 3337 void Emit(Instr instruction) { 3338 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize); 3339 VIXL_ASSERT(AllowAssembler()); 3340 GetBuffer()->Emit32(instruction); 3341 } 3342 3343 PositionIndependentCodeOption pic_; 3344 }; 3345 3346 3347 template <typename T> 3348 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) { 3349 return UpdateValue(new_value, 3350 assembler->GetBuffer().GetStartAddress<uint8_t*>()); 3351 } 3352 3353 3354 template <typename T> 3355 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) { 3356 return UpdateValue(high64, 3357 low64, 3358 assembler->GetBuffer().GetStartAddress<uint8_t*>()); 3359 } 3360 3361 3362 } // namespace aarch64 3363 3364 // Required InvalSet template specialisations. 3365 // TODO: These template specialisations should not live in this file. Move 3366 // Label out of the aarch64 namespace in order to share its implementation 3367 // later. 3368 #define INVAL_SET_TEMPLATE_PARAMETERS \ 3369 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \ 3370 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \ 3371 aarch64::Label::kReclaimFactor 3372 template <> 3373 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey( 3374 const ptrdiff_t& element) { 3375 return element; 3376 } 3377 template <> 3378 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element, 3379 ptrdiff_t key) { 3380 *element = key; 3381 } 3382 #undef INVAL_SET_TEMPLATE_PARAMETERS 3383 3384 } // namespace vixl 3385 3386 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_ 3387