Home | History | Annotate | Download | only in aarch64
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include <cctype>
     28 
     29 #include "macro-assembler-aarch64.h"
     30 
     31 namespace vixl {
     32 namespace aarch64 {
     33 
     34 
     35 void Pool::Release() {
     36   if (--monitor_ == 0) {
     37     // Ensure the pool has not been blocked for too long.
     38     VIXL_ASSERT(masm_->GetCursorOffset() < checkpoint_);
     39   }
     40 }
     41 
     42 
     43 void Pool::SetNextCheckpoint(ptrdiff_t checkpoint) {
     44   masm_->checkpoint_ = std::min(masm_->checkpoint_, checkpoint);
     45   checkpoint_ = checkpoint;
     46 }
     47 
     48 
     49 LiteralPool::LiteralPool(MacroAssembler* masm)
     50     : Pool(masm),
     51       size_(0),
     52       first_use_(-1),
     53       recommended_checkpoint_(kNoCheckpointRequired) {}
     54 
     55 
     56 LiteralPool::~LiteralPool() {
     57   VIXL_ASSERT(IsEmpty());
     58   VIXL_ASSERT(!IsBlocked());
     59   for (std::vector<RawLiteral*>::iterator it = deleted_on_destruction_.begin();
     60        it != deleted_on_destruction_.end();
     61        it++) {
     62     delete *it;
     63   }
     64 }
     65 
     66 
     67 void LiteralPool::Reset() {
     68   std::vector<RawLiteral*>::iterator it, end;
     69   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
     70     RawLiteral* literal = *it;
     71     if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) {
     72       delete literal;
     73     }
     74   }
     75   entries_.clear();
     76   size_ = 0;
     77   first_use_ = -1;
     78   Pool::Reset();
     79   recommended_checkpoint_ = kNoCheckpointRequired;
     80 }
     81 
     82 
     83 void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) {
     84   if (IsEmpty() || IsBlocked()) return;
     85 
     86   ptrdiff_t distance = masm_->GetCursorOffset() + amount - first_use_;
     87   if (distance >= kRecommendedLiteralPoolRange) {
     88     Emit(option);
     89   }
     90 }
     91 
     92 
     93 // We use a subclass to access the protected `ExactAssemblyScope` constructor
     94 // giving us control over the pools. This allows us to use this scope within
     95 // code emitting pools without creating a circular dependency.
     96 // We keep the constructor private to restrict usage of this helper class.
     97 class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
     98  private:
     99   ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm, size_t size)
    100       : ExactAssemblyScope(masm,
    101                            size,
    102                            ExactAssemblyScope::kExactSize,
    103                            ExactAssemblyScope::kIgnorePools) {}
    104 
    105   friend void LiteralPool::Emit(LiteralPool::EmitOption);
    106   friend void VeneerPool::Emit(VeneerPool::EmitOption, size_t);
    107 };
    108 
    109 
    110 void LiteralPool::Emit(EmitOption option) {
    111   // There is an issue if we are asked to emit a blocked or empty pool.
    112   VIXL_ASSERT(!IsBlocked());
    113   VIXL_ASSERT(!IsEmpty());
    114 
    115   size_t pool_size = GetSize();
    116   size_t emit_size = pool_size;
    117   if (option == kBranchRequired) emit_size += kInstructionSize;
    118   Label end_of_pool;
    119 
    120   VIXL_ASSERT(emit_size % kInstructionSize == 0);
    121   {
    122     CodeBufferCheckScope guard(masm_,
    123                                emit_size,
    124                                CodeBufferCheckScope::kCheck,
    125                                CodeBufferCheckScope::kExactSize);
    126 #ifdef VIXL_DEBUG
    127     // Also explicitly disallow usage of the `MacroAssembler` here.
    128     masm_->SetAllowMacroInstructions(false);
    129 #endif
    130     if (option == kBranchRequired) {
    131       ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
    132       masm_->b(&end_of_pool);
    133     }
    134 
    135     {
    136       // Marker indicating the size of the literal pool in 32-bit words.
    137       VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
    138       ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
    139       masm_->ldr(xzr, static_cast<int>(pool_size / kWRegSizeInBytes));
    140     }
    141 
    142     // Now populate the literal pool.
    143     std::vector<RawLiteral*>::iterator it, end;
    144     for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
    145       VIXL_ASSERT((*it)->IsUsed());
    146       masm_->place(*it);
    147     }
    148 
    149     if (option == kBranchRequired) masm_->bind(&end_of_pool);
    150 #ifdef VIXL_DEBUG
    151     masm_->SetAllowMacroInstructions(true);
    152 #endif
    153   }
    154 
    155   Reset();
    156 }
    157 
    158 
    159 void LiteralPool::AddEntry(RawLiteral* literal) {
    160   // A literal must be registered immediately before its first use. Here we
    161   // cannot control that it is its first use, but we check no code has been
    162   // emitted since its last use.
    163   VIXL_ASSERT(masm_->GetCursorOffset() == literal->GetLastUse());
    164 
    165   UpdateFirstUse(masm_->GetCursorOffset());
    166   VIXL_ASSERT(masm_->GetCursorOffset() >= first_use_);
    167   entries_.push_back(literal);
    168   size_ += literal->GetSize();
    169 }
    170 
    171 
    172 void LiteralPool::UpdateFirstUse(ptrdiff_t use_position) {
    173   first_use_ = std::min(first_use_, use_position);
    174   if (first_use_ == -1) {
    175     first_use_ = use_position;
    176     SetNextRecommendedCheckpoint(GetNextRecommendedCheckpoint());
    177     SetNextCheckpoint(first_use_ + Instruction::kLoadLiteralRange);
    178   } else {
    179     VIXL_ASSERT(use_position > first_use_);
    180   }
    181 }
    182 
    183 
    184 void VeneerPool::Reset() {
    185   Pool::Reset();
    186   unresolved_branches_.Reset();
    187 }
    188 
    189 
    190 void VeneerPool::Release() {
    191   if (--monitor_ == 0) {
    192     VIXL_ASSERT(IsEmpty() ||
    193                 masm_->GetCursorOffset() <
    194                     unresolved_branches_.GetFirstLimit());
    195   }
    196 }
    197 
    198 
    199 void VeneerPool::RegisterUnresolvedBranch(ptrdiff_t branch_pos,
    200                                           Label* label,
    201                                           ImmBranchType branch_type) {
    202   VIXL_ASSERT(!label->IsBound());
    203   BranchInfo branch_info = BranchInfo(branch_pos, label, branch_type);
    204   unresolved_branches_.insert(branch_info);
    205   UpdateNextCheckPoint();
    206   // TODO: In debug mode register the label with the assembler to make sure it
    207   // is bound with masm Bind and not asm bind.
    208 }
    209 
    210 
    211 void VeneerPool::DeleteUnresolvedBranchInfoForLabel(Label* label) {
    212   if (IsEmpty()) {
    213     VIXL_ASSERT(checkpoint_ == kNoCheckpointRequired);
    214     return;
    215   }
    216 
    217   if (label->IsLinked()) {
    218     Label::LabelLinksIterator links_it(label);
    219     for (; !links_it.Done(); links_it.Advance()) {
    220       ptrdiff_t link_offset = *links_it.Current();
    221       Instruction* link = masm_->GetInstructionAt(link_offset);
    222 
    223       // ADR instructions are not handled.
    224       if (BranchTypeUsesVeneers(link->GetBranchType())) {
    225         BranchInfo branch_info(link_offset, label, link->GetBranchType());
    226         unresolved_branches_.erase(branch_info);
    227       }
    228     }
    229   }
    230 
    231   UpdateNextCheckPoint();
    232 }
    233 
    234 
    235 bool VeneerPool::ShouldEmitVeneer(int64_t max_reachable_pc, size_t amount) {
    236   ptrdiff_t offset =
    237       kPoolNonVeneerCodeSize + amount + GetMaxSize() + GetOtherPoolsMaxSize();
    238   return (masm_->GetCursorOffset() + offset) > max_reachable_pc;
    239 }
    240 
    241 
    242 void VeneerPool::CheckEmitFor(size_t amount, EmitOption option) {
    243   if (IsEmpty()) return;
    244 
    245   VIXL_ASSERT(masm_->GetCursorOffset() < unresolved_branches_.GetFirstLimit());
    246 
    247   if (IsBlocked()) return;
    248 
    249   if (ShouldEmitVeneers(amount)) {
    250     Emit(option, amount);
    251   } else {
    252     UpdateNextCheckPoint();
    253   }
    254 }
    255 
    256 
    257 void VeneerPool::Emit(EmitOption option, size_t amount) {
    258   // There is an issue if we are asked to emit a blocked or empty pool.
    259   VIXL_ASSERT(!IsBlocked());
    260   VIXL_ASSERT(!IsEmpty());
    261 
    262   Label end;
    263   if (option == kBranchRequired) {
    264     ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
    265     masm_->b(&end);
    266   }
    267 
    268   // We want to avoid generating veneer pools too often, so generate veneers for
    269   // branches that don't immediately require a veneer but will soon go out of
    270   // range.
    271   static const size_t kVeneerEmissionMargin = 1 * KBytes;
    272 
    273   for (BranchInfoSetIterator it(&unresolved_branches_); !it.Done();) {
    274     BranchInfo* branch_info = it.Current();
    275     if (ShouldEmitVeneer(branch_info->max_reachable_pc_,
    276                          amount + kVeneerEmissionMargin)) {
    277       CodeBufferCheckScope scope(masm_,
    278                                  kVeneerCodeSize,
    279                                  CodeBufferCheckScope::kCheck,
    280                                  CodeBufferCheckScope::kExactSize);
    281       ptrdiff_t branch_pos = branch_info->pc_offset_;
    282       Instruction* branch = masm_->GetInstructionAt(branch_pos);
    283       Label* label = branch_info->label_;
    284 
    285       // Patch the branch to point to the current position, and emit a branch
    286       // to the label.
    287       Instruction* veneer = masm_->GetCursorAddress<Instruction*>();
    288       branch->SetImmPCOffsetTarget(veneer);
    289       {
    290         ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
    291         masm_->b(label);
    292       }
    293 
    294       // Update the label. The branch patched does not point to it any longer.
    295       label->DeleteLink(branch_pos);
    296 
    297       it.DeleteCurrentAndAdvance();
    298     } else {
    299       it.AdvanceToNextType();
    300     }
    301   }
    302 
    303   UpdateNextCheckPoint();
    304 
    305   masm_->bind(&end);
    306 }
    307 
    308 
    309 MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic)
    310     : Assembler(pic),
    311 #ifdef VIXL_DEBUG
    312       allow_macro_instructions_(true),
    313 #endif
    314       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
    315       sp_(sp),
    316       tmp_list_(ip0, ip1),
    317       fptmp_list_(d31),
    318       current_scratch_scope_(NULL),
    319       literal_pool_(this),
    320       veneer_pool_(this),
    321       recommended_checkpoint_(Pool::kNoCheckpointRequired) {
    322   checkpoint_ = GetNextCheckPoint();
    323 #ifndef VIXL_DEBUG
    324   USE(allow_macro_instructions_);
    325 #endif
    326 }
    327 
    328 
    329 MacroAssembler::MacroAssembler(size_t capacity,
    330                                PositionIndependentCodeOption pic)
    331     : Assembler(capacity, pic),
    332 #ifdef VIXL_DEBUG
    333       allow_macro_instructions_(true),
    334 #endif
    335       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
    336       sp_(sp),
    337       tmp_list_(ip0, ip1),
    338       fptmp_list_(d31),
    339       current_scratch_scope_(NULL),
    340       literal_pool_(this),
    341       veneer_pool_(this),
    342       recommended_checkpoint_(Pool::kNoCheckpointRequired) {
    343   checkpoint_ = GetNextCheckPoint();
    344 }
    345 
    346 
    347 MacroAssembler::MacroAssembler(byte* buffer,
    348                                size_t capacity,
    349                                PositionIndependentCodeOption pic)
    350     : Assembler(buffer, capacity, pic),
    351 #ifdef VIXL_DEBUG
    352       allow_macro_instructions_(true),
    353 #endif
    354       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
    355       sp_(sp),
    356       tmp_list_(ip0, ip1),
    357       fptmp_list_(d31),
    358       current_scratch_scope_(NULL),
    359       literal_pool_(this),
    360       veneer_pool_(this),
    361       recommended_checkpoint_(Pool::kNoCheckpointRequired) {
    362   checkpoint_ = GetNextCheckPoint();
    363 }
    364 
    365 
    366 MacroAssembler::~MacroAssembler() {}
    367 
    368 
    369 void MacroAssembler::Reset() {
    370   Assembler::Reset();
    371 
    372   VIXL_ASSERT(!literal_pool_.IsBlocked());
    373   literal_pool_.Reset();
    374   veneer_pool_.Reset();
    375 
    376   checkpoint_ = GetNextCheckPoint();
    377 }
    378 
    379 
    380 void MacroAssembler::FinalizeCode() {
    381   if (!literal_pool_.IsEmpty()) literal_pool_.Emit();
    382   VIXL_ASSERT(veneer_pool_.IsEmpty());
    383 
    384   Assembler::FinalizeCode();
    385 }
    386 
    387 
    388 void MacroAssembler::CheckEmitFor(size_t amount) {
    389   CheckEmitPoolsFor(amount);
    390   GetBuffer()->EnsureSpaceFor(amount);
    391 }
    392 
    393 
    394 void MacroAssembler::CheckEmitPoolsFor(size_t amount) {
    395   literal_pool_.CheckEmitFor(amount);
    396   veneer_pool_.CheckEmitFor(amount);
    397   checkpoint_ = GetNextCheckPoint();
    398 }
    399 
    400 
    401 int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
    402                                         const Register& rd,
    403                                         uint64_t imm) {
    404   bool emit_code = (masm != NULL);
    405   VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits());
    406   // The worst case for size is mov 64-bit immediate to sp:
    407   //  * up to 4 instructions to materialise the constant
    408   //  * 1 instruction to move to sp
    409   MacroEmissionCheckScope guard(masm);
    410 
    411   // Immediates on Aarch64 can be produced using an initial value, and zero to
    412   // three move keep operations.
    413   //
    414   // Initial values can be generated with:
    415   //  1. 64-bit move zero (movz).
    416   //  2. 32-bit move inverted (movn).
    417   //  3. 64-bit move inverted.
    418   //  4. 32-bit orr immediate.
    419   //  5. 64-bit orr immediate.
    420   // Move-keep may then be used to modify each of the 16-bit half words.
    421   //
    422   // The code below supports all five initial value generators, and
    423   // applying move-keep operations to move-zero and move-inverted initial
    424   // values.
    425 
    426   // Try to move the immediate in one instruction, and if that fails, switch to
    427   // using multiple instructions.
    428   if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
    429     return 1;
    430   } else {
    431     int instruction_count = 0;
    432     unsigned reg_size = rd.GetSizeInBits();
    433 
    434     // Generic immediate case. Imm will be represented by
    435     //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
    436     // A move-zero or move-inverted is generated for the first non-zero or
    437     // non-0xffff immX, and a move-keep for subsequent non-zero immX.
    438 
    439     uint64_t ignored_halfword = 0;
    440     bool invert_move = false;
    441     // If the number of 0xffff halfwords is greater than the number of 0x0000
    442     // halfwords, it's more efficient to use move-inverted.
    443     if (CountClearHalfWords(~imm, reg_size) >
    444         CountClearHalfWords(imm, reg_size)) {
    445       ignored_halfword = 0xffff;
    446       invert_move = true;
    447     }
    448 
    449     // Mov instructions can't move values into the stack pointer, so set up a
    450     // temporary register, if needed.
    451     UseScratchRegisterScope temps;
    452     Register temp;
    453     if (emit_code) {
    454       temps.Open(masm);
    455       temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
    456     }
    457 
    458     // Iterate through the halfwords. Use movn/movz for the first non-ignored
    459     // halfword, and movk for subsequent halfwords.
    460     VIXL_ASSERT((reg_size % 16) == 0);
    461     bool first_mov_done = false;
    462     for (unsigned i = 0; i < (reg_size / 16); i++) {
    463       uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
    464       if (imm16 != ignored_halfword) {
    465         if (!first_mov_done) {
    466           if (invert_move) {
    467             if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
    468             instruction_count++;
    469           } else {
    470             if (emit_code) masm->movz(temp, imm16, 16 * i);
    471             instruction_count++;
    472           }
    473           first_mov_done = true;
    474         } else {
    475           // Construct a wider constant.
    476           if (emit_code) masm->movk(temp, imm16, 16 * i);
    477           instruction_count++;
    478         }
    479       }
    480     }
    481 
    482     VIXL_ASSERT(first_mov_done);
    483 
    484     // Move the temporary if the original destination register was the stack
    485     // pointer.
    486     if (rd.IsSP()) {
    487       if (emit_code) masm->mov(rd, temp);
    488       instruction_count++;
    489     }
    490     return instruction_count;
    491   }
    492 }
    493 
    494 
    495 bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm,
    496                                                  const Register& dst,
    497                                                  int64_t imm) {
    498   bool emit_code = masm != NULL;
    499   unsigned n, imm_s, imm_r;
    500   int reg_size = dst.GetSizeInBits();
    501 
    502   if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
    503     // Immediate can be represented in a move zero instruction. Movz can't write
    504     // to the stack pointer.
    505     if (emit_code) {
    506       masm->movz(dst, imm);
    507     }
    508     return true;
    509   } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
    510     // Immediate can be represented in a move negative instruction. Movn can't
    511     // write to the stack pointer.
    512     if (emit_code) {
    513       masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
    514     }
    515     return true;
    516   } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
    517     // Immediate can be represented in a logical orr instruction.
    518     VIXL_ASSERT(!dst.IsZero());
    519     if (emit_code) {
    520       masm->LogicalImmediate(dst,
    521                              AppropriateZeroRegFor(dst),
    522                              n,
    523                              imm_s,
    524                              imm_r,
    525                              ORR);
    526     }
    527     return true;
    528   }
    529   return false;
    530 }
    531 
    532 
    533 void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
    534   VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
    535               ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
    536   if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
    537     B(static_cast<Condition>(type), label);
    538   } else {
    539     switch (type) {
    540       case always:
    541         B(label);
    542         break;
    543       case never:
    544         break;
    545       case reg_zero:
    546         Cbz(reg, label);
    547         break;
    548       case reg_not_zero:
    549         Cbnz(reg, label);
    550         break;
    551       case reg_bit_clear:
    552         Tbz(reg, bit, label);
    553         break;
    554       case reg_bit_set:
    555         Tbnz(reg, bit, label);
    556         break;
    557       default:
    558         VIXL_UNREACHABLE();
    559     }
    560   }
    561 }
    562 
    563 
    564 void MacroAssembler::B(Label* label) {
    565   SingleEmissionCheckScope guard(this);
    566   b(label);
    567 }
    568 
    569 
    570 void MacroAssembler::B(Label* label, Condition cond) {
    571   VIXL_ASSERT(allow_macro_instructions_);
    572   VIXL_ASSERT((cond != al) && (cond != nv));
    573   EmissionCheckScope guard(this, 2 * kInstructionSize);
    574 
    575   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
    576     Label done;
    577     b(&done, InvertCondition(cond));
    578     b(label);
    579     bind(&done);
    580   } else {
    581     if (!label->IsBound()) {
    582       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    583                                             label,
    584                                             CondBranchType);
    585     }
    586     b(label, cond);
    587   }
    588 }
    589 
    590 
    591 void MacroAssembler::Cbnz(const Register& rt, Label* label) {
    592   VIXL_ASSERT(allow_macro_instructions_);
    593   VIXL_ASSERT(!rt.IsZero());
    594   EmissionCheckScope guard(this, 2 * kInstructionSize);
    595 
    596   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
    597     Label done;
    598     cbz(rt, &done);
    599     b(label);
    600     bind(&done);
    601   } else {
    602     if (!label->IsBound()) {
    603       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    604                                             label,
    605                                             CompareBranchType);
    606     }
    607     cbnz(rt, label);
    608   }
    609 }
    610 
    611 
    612 void MacroAssembler::Cbz(const Register& rt, Label* label) {
    613   VIXL_ASSERT(allow_macro_instructions_);
    614   VIXL_ASSERT(!rt.IsZero());
    615   EmissionCheckScope guard(this, 2 * kInstructionSize);
    616 
    617   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
    618     Label done;
    619     cbnz(rt, &done);
    620     b(label);
    621     bind(&done);
    622   } else {
    623     if (!label->IsBound()) {
    624       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    625                                             label,
    626                                             CompareBranchType);
    627     }
    628     cbz(rt, label);
    629   }
    630 }
    631 
    632 
    633 void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
    634   VIXL_ASSERT(allow_macro_instructions_);
    635   VIXL_ASSERT(!rt.IsZero());
    636   EmissionCheckScope guard(this, 2 * kInstructionSize);
    637 
    638   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
    639     Label done;
    640     tbz(rt, bit_pos, &done);
    641     b(label);
    642     bind(&done);
    643   } else {
    644     if (!label->IsBound()) {
    645       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    646                                             label,
    647                                             TestBranchType);
    648     }
    649     tbnz(rt, bit_pos, label);
    650   }
    651 }
    652 
    653 
    654 void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
    655   VIXL_ASSERT(allow_macro_instructions_);
    656   VIXL_ASSERT(!rt.IsZero());
    657   EmissionCheckScope guard(this, 2 * kInstructionSize);
    658 
    659   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
    660     Label done;
    661     tbnz(rt, bit_pos, &done);
    662     b(label);
    663     bind(&done);
    664   } else {
    665     if (!label->IsBound()) {
    666       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    667                                             label,
    668                                             TestBranchType);
    669     }
    670     tbz(rt, bit_pos, label);
    671   }
    672 }
    673 
    674 
    675 void MacroAssembler::Bind(Label* label) {
    676   VIXL_ASSERT(allow_macro_instructions_);
    677   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
    678   bind(label);
    679 }
    680 
    681 
    682 // Bind a label to a specified offset from the start of the buffer.
    683 void MacroAssembler::BindToOffset(Label* label, ptrdiff_t offset) {
    684   VIXL_ASSERT(allow_macro_instructions_);
    685   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
    686   Assembler::BindToOffset(label, offset);
    687 }
    688 
    689 
    690 void MacroAssembler::And(const Register& rd,
    691                          const Register& rn,
    692                          const Operand& operand) {
    693   VIXL_ASSERT(allow_macro_instructions_);
    694   LogicalMacro(rd, rn, operand, AND);
    695 }
    696 
    697 
    698 void MacroAssembler::Ands(const Register& rd,
    699                           const Register& rn,
    700                           const Operand& operand) {
    701   VIXL_ASSERT(allow_macro_instructions_);
    702   LogicalMacro(rd, rn, operand, ANDS);
    703 }
    704 
    705 
    706 void MacroAssembler::Tst(const Register& rn, const Operand& operand) {
    707   VIXL_ASSERT(allow_macro_instructions_);
    708   Ands(AppropriateZeroRegFor(rn), rn, operand);
    709 }
    710 
    711 
    712 void MacroAssembler::Bic(const Register& rd,
    713                          const Register& rn,
    714                          const Operand& operand) {
    715   VIXL_ASSERT(allow_macro_instructions_);
    716   LogicalMacro(rd, rn, operand, BIC);
    717 }
    718 
    719 
    720 void MacroAssembler::Bics(const Register& rd,
    721                           const Register& rn,
    722                           const Operand& operand) {
    723   VIXL_ASSERT(allow_macro_instructions_);
    724   LogicalMacro(rd, rn, operand, BICS);
    725 }
    726 
    727 
    728 void MacroAssembler::Orr(const Register& rd,
    729                          const Register& rn,
    730                          const Operand& operand) {
    731   VIXL_ASSERT(allow_macro_instructions_);
    732   LogicalMacro(rd, rn, operand, ORR);
    733 }
    734 
    735 
    736 void MacroAssembler::Orn(const Register& rd,
    737                          const Register& rn,
    738                          const Operand& operand) {
    739   VIXL_ASSERT(allow_macro_instructions_);
    740   LogicalMacro(rd, rn, operand, ORN);
    741 }
    742 
    743 
    744 void MacroAssembler::Eor(const Register& rd,
    745                          const Register& rn,
    746                          const Operand& operand) {
    747   VIXL_ASSERT(allow_macro_instructions_);
    748   LogicalMacro(rd, rn, operand, EOR);
    749 }
    750 
    751 
    752 void MacroAssembler::Eon(const Register& rd,
    753                          const Register& rn,
    754                          const Operand& operand) {
    755   VIXL_ASSERT(allow_macro_instructions_);
    756   LogicalMacro(rd, rn, operand, EON);
    757 }
    758 
    759 
    760 void MacroAssembler::LogicalMacro(const Register& rd,
    761                                   const Register& rn,
    762                                   const Operand& operand,
    763                                   LogicalOp op) {
    764   // The worst case for size is logical immediate to sp:
    765   //  * up to 4 instructions to materialise the constant
    766   //  * 1 instruction to do the operation
    767   //  * 1 instruction to move to sp
    768   MacroEmissionCheckScope guard(this);
    769   UseScratchRegisterScope temps(this);
    770 
    771   if (operand.IsImmediate()) {
    772     int64_t immediate = operand.GetImmediate();
    773     unsigned reg_size = rd.GetSizeInBits();
    774 
    775     // If the operation is NOT, invert the operation and immediate.
    776     if ((op & NOT) == NOT) {
    777       op = static_cast<LogicalOp>(op & ~NOT);
    778       immediate = ~immediate;
    779     }
    780 
    781     // Ignore the top 32 bits of an immediate if we're moving to a W register.
    782     if (rd.Is32Bits()) {
    783       // Check that the top 32 bits are consistent.
    784       VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
    785                   ((immediate >> kWRegSize) == -1));
    786       immediate &= kWRegMask;
    787     }
    788 
    789     VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
    790 
    791     // Special cases for all set or all clear immediates.
    792     if (immediate == 0) {
    793       switch (op) {
    794         case AND:
    795           Mov(rd, 0);
    796           return;
    797         case ORR:
    798           VIXL_FALLTHROUGH();
    799         case EOR:
    800           Mov(rd, rn);
    801           return;
    802         case ANDS:
    803           VIXL_FALLTHROUGH();
    804         case BICS:
    805           break;
    806         default:
    807           VIXL_UNREACHABLE();
    808       }
    809     } else if ((rd.Is64Bits() && (immediate == -1)) ||
    810                (rd.Is32Bits() && (immediate == 0xffffffff))) {
    811       switch (op) {
    812         case AND:
    813           Mov(rd, rn);
    814           return;
    815         case ORR:
    816           Mov(rd, immediate);
    817           return;
    818         case EOR:
    819           Mvn(rd, rn);
    820           return;
    821         case ANDS:
    822           VIXL_FALLTHROUGH();
    823         case BICS:
    824           break;
    825         default:
    826           VIXL_UNREACHABLE();
    827       }
    828     }
    829 
    830     unsigned n, imm_s, imm_r;
    831     if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
    832       // Immediate can be encoded in the instruction.
    833       LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
    834     } else {
    835       // Immediate can't be encoded: synthesize using move immediate.
    836       Register temp = temps.AcquireSameSizeAs(rn);
    837       Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate);
    838 
    839       if (rd.Is(sp)) {
    840         // If rd is the stack pointer we cannot use it as the destination
    841         // register so we use the temp register as an intermediate again.
    842         Logical(temp, rn, imm_operand, op);
    843         Mov(sp, temp);
    844       } else {
    845         Logical(rd, rn, imm_operand, op);
    846       }
    847     }
    848   } else if (operand.IsExtendedRegister()) {
    849     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
    850     // Add/sub extended supports shift <= 4. We want to support exactly the
    851     // same modes here.
    852     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
    853     VIXL_ASSERT(
    854         operand.GetRegister().Is64Bits() ||
    855         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
    856 
    857     temps.Exclude(operand.GetRegister());
    858     Register temp = temps.AcquireSameSizeAs(rn);
    859     EmitExtendShift(temp,
    860                     operand.GetRegister(),
    861                     operand.GetExtend(),
    862                     operand.GetShiftAmount());
    863     Logical(rd, rn, Operand(temp), op);
    864   } else {
    865     // The operand can be encoded in the instruction.
    866     VIXL_ASSERT(operand.IsShiftedRegister());
    867     Logical(rd, rn, operand, op);
    868   }
    869 }
    870 
    871 
    872 void MacroAssembler::Mov(const Register& rd,
    873                          const Operand& operand,
    874                          DiscardMoveMode discard_mode) {
    875   VIXL_ASSERT(allow_macro_instructions_);
    876   // The worst case for size is mov immediate with up to 4 instructions.
    877   MacroEmissionCheckScope guard(this);
    878 
    879   if (operand.IsImmediate()) {
    880     // Call the macro assembler for generic immediates.
    881     Mov(rd, operand.GetImmediate());
    882   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
    883     // Emit a shift instruction if moving a shifted register. This operation
    884     // could also be achieved using an orr instruction (like orn used by Mvn),
    885     // but using a shift instruction makes the disassembly clearer.
    886     EmitShift(rd,
    887               operand.GetRegister(),
    888               operand.GetShift(),
    889               operand.GetShiftAmount());
    890   } else if (operand.IsExtendedRegister()) {
    891     // Emit an extend instruction if moving an extended register. This handles
    892     // extend with post-shift operations, too.
    893     EmitExtendShift(rd,
    894                     operand.GetRegister(),
    895                     operand.GetExtend(),
    896                     operand.GetShiftAmount());
    897   } else {
    898     Mov(rd, operand.GetRegister(), discard_mode);
    899   }
    900 }
    901 
    902 
    903 void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
    904   VIXL_ASSERT(IsUint16(imm));
    905   int byte1 = (imm & 0xff);
    906   int byte2 = ((imm >> 8) & 0xff);
    907   if (byte1 == byte2) {
    908     movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
    909   } else if (byte1 == 0) {
    910     movi(vd, byte2, LSL, 8);
    911   } else if (byte2 == 0) {
    912     movi(vd, byte1);
    913   } else if (byte1 == 0xff) {
    914     mvni(vd, ~byte2 & 0xff, LSL, 8);
    915   } else if (byte2 == 0xff) {
    916     mvni(vd, ~byte1 & 0xff);
    917   } else {
    918     UseScratchRegisterScope temps(this);
    919     Register temp = temps.AcquireW();
    920     movz(temp, imm);
    921     dup(vd, temp);
    922   }
    923 }
    924 
    925 
    926 void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
    927   VIXL_ASSERT(IsUint32(imm));
    928 
    929   uint8_t bytes[sizeof(imm)];
    930   memcpy(bytes, &imm, sizeof(imm));
    931 
    932   // All bytes are either 0x00 or 0xff.
    933   {
    934     bool all0orff = true;
    935     for (int i = 0; i < 4; ++i) {
    936       if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
    937         all0orff = false;
    938         break;
    939       }
    940     }
    941 
    942     if (all0orff == true) {
    943       movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
    944       return;
    945     }
    946   }
    947 
    948   // Of the 4 bytes, only one byte is non-zero.
    949   for (int i = 0; i < 4; i++) {
    950     if ((imm & (0xff << (i * 8))) == imm) {
    951       movi(vd, bytes[i], LSL, i * 8);
    952       return;
    953     }
    954   }
    955 
    956   // Of the 4 bytes, only one byte is not 0xff.
    957   for (int i = 0; i < 4; i++) {
    958     uint32_t mask = ~(0xff << (i * 8));
    959     if ((imm & mask) == mask) {
    960       mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
    961       return;
    962     }
    963   }
    964 
    965   // Immediate is of the form 0x00MMFFFF.
    966   if ((imm & 0xff00ffff) == 0x0000ffff) {
    967     movi(vd, bytes[2], MSL, 16);
    968     return;
    969   }
    970 
    971   // Immediate is of the form 0x0000MMFF.
    972   if ((imm & 0xffff00ff) == 0x000000ff) {
    973     movi(vd, bytes[1], MSL, 8);
    974     return;
    975   }
    976 
    977   // Immediate is of the form 0xFFMM0000.
    978   if ((imm & 0xff00ffff) == 0xff000000) {
    979     mvni(vd, ~bytes[2] & 0xff, MSL, 16);
    980     return;
    981   }
    982   // Immediate is of the form 0xFFFFMM00.
    983   if ((imm & 0xffff00ff) == 0xffff0000) {
    984     mvni(vd, ~bytes[1] & 0xff, MSL, 8);
    985     return;
    986   }
    987 
    988   // Top and bottom 16-bits are equal.
    989   if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
    990     Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
    991     return;
    992   }
    993 
    994   // Default case.
    995   {
    996     UseScratchRegisterScope temps(this);
    997     Register temp = temps.AcquireW();
    998     Mov(temp, imm);
    999     dup(vd, temp);
   1000   }
   1001 }
   1002 
   1003 
   1004 void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
   1005   // All bytes are either 0x00 or 0xff.
   1006   {
   1007     bool all0orff = true;
   1008     for (int i = 0; i < 8; ++i) {
   1009       int byteval = (imm >> (i * 8)) & 0xff;
   1010       if (byteval != 0 && byteval != 0xff) {
   1011         all0orff = false;
   1012         break;
   1013       }
   1014     }
   1015     if (all0orff == true) {
   1016       movi(vd, imm);
   1017       return;
   1018     }
   1019   }
   1020 
   1021   // Top and bottom 32-bits are equal.
   1022   if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
   1023     Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
   1024     return;
   1025   }
   1026 
   1027   // Default case.
   1028   {
   1029     UseScratchRegisterScope temps(this);
   1030     Register temp = temps.AcquireX();
   1031     Mov(temp, imm);
   1032     if (vd.Is1D()) {
   1033       mov(vd.D(), 0, temp);
   1034     } else {
   1035       dup(vd.V2D(), temp);
   1036     }
   1037   }
   1038 }
   1039 
   1040 
   1041 void MacroAssembler::Movi(const VRegister& vd,
   1042                           uint64_t imm,
   1043                           Shift shift,
   1044                           int shift_amount) {
   1045   VIXL_ASSERT(allow_macro_instructions_);
   1046   MacroEmissionCheckScope guard(this);
   1047   if (shift_amount != 0 || shift != LSL) {
   1048     movi(vd, imm, shift, shift_amount);
   1049   } else if (vd.Is8B() || vd.Is16B()) {
   1050     // 8-bit immediate.
   1051     VIXL_ASSERT(IsUint8(imm));
   1052     movi(vd, imm);
   1053   } else if (vd.Is4H() || vd.Is8H()) {
   1054     // 16-bit immediate.
   1055     Movi16bitHelper(vd, imm);
   1056   } else if (vd.Is2S() || vd.Is4S()) {
   1057     // 32-bit immediate.
   1058     Movi32bitHelper(vd, imm);
   1059   } else {
   1060     // 64-bit immediate.
   1061     Movi64bitHelper(vd, imm);
   1062   }
   1063 }
   1064 
   1065 
   1066 void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
   1067   // TODO: Move 128-bit values in a more efficient way.
   1068   VIXL_ASSERT(vd.Is128Bits());
   1069   UseScratchRegisterScope temps(this);
   1070   Movi(vd.V2D(), lo);
   1071   Register temp = temps.AcquireX();
   1072   Mov(temp, hi);
   1073   Ins(vd.V2D(), 1, temp);
   1074 }
   1075 
   1076 
   1077 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
   1078   VIXL_ASSERT(allow_macro_instructions_);
   1079   // The worst case for size is mvn immediate with up to 4 instructions.
   1080   MacroEmissionCheckScope guard(this);
   1081 
   1082   if (operand.IsImmediate()) {
   1083     // Call the macro assembler for generic immediates.
   1084     Mvn(rd, operand.GetImmediate());
   1085   } else if (operand.IsExtendedRegister()) {
   1086     UseScratchRegisterScope temps(this);
   1087     temps.Exclude(operand.GetRegister());
   1088 
   1089     // Emit two instructions for the extend case. This differs from Mov, as
   1090     // the extend and invert can't be achieved in one instruction.
   1091     Register temp = temps.AcquireSameSizeAs(rd);
   1092     EmitExtendShift(temp,
   1093                     operand.GetRegister(),
   1094                     operand.GetExtend(),
   1095                     operand.GetShiftAmount());
   1096     mvn(rd, Operand(temp));
   1097   } else {
   1098     // Otherwise, register and shifted register cases can be handled by the
   1099     // assembler directly, using orn.
   1100     mvn(rd, operand);
   1101   }
   1102 }
   1103 
   1104 
   1105 void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
   1106   VIXL_ASSERT(allow_macro_instructions_);
   1107   MoveImmediateHelper(this, rd, imm);
   1108 }
   1109 
   1110 
   1111 void MacroAssembler::Ccmp(const Register& rn,
   1112                           const Operand& operand,
   1113                           StatusFlags nzcv,
   1114                           Condition cond) {
   1115   VIXL_ASSERT(allow_macro_instructions_);
   1116   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
   1117     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN);
   1118   } else {
   1119     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
   1120   }
   1121 }
   1122 
   1123 
   1124 void MacroAssembler::Ccmn(const Register& rn,
   1125                           const Operand& operand,
   1126                           StatusFlags nzcv,
   1127                           Condition cond) {
   1128   VIXL_ASSERT(allow_macro_instructions_);
   1129   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
   1130     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP);
   1131   } else {
   1132     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
   1133   }
   1134 }
   1135 
   1136 
   1137 void MacroAssembler::ConditionalCompareMacro(const Register& rn,
   1138                                              const Operand& operand,
   1139                                              StatusFlags nzcv,
   1140                                              Condition cond,
   1141                                              ConditionalCompareOp op) {
   1142   VIXL_ASSERT((cond != al) && (cond != nv));
   1143   // The worst case for size is ccmp immediate:
   1144   //  * up to 4 instructions to materialise the constant
   1145   //  * 1 instruction for ccmp
   1146   MacroEmissionCheckScope guard(this);
   1147 
   1148   if ((operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)) ||
   1149       (operand.IsImmediate() &&
   1150        IsImmConditionalCompare(operand.GetImmediate()))) {
   1151     // The immediate can be encoded in the instruction, or the operand is an
   1152     // unshifted register: call the assembler.
   1153     ConditionalCompare(rn, operand, nzcv, cond, op);
   1154   } else {
   1155     UseScratchRegisterScope temps(this);
   1156     // The operand isn't directly supported by the instruction: perform the
   1157     // operation on a temporary register.
   1158     Register temp = temps.AcquireSameSizeAs(rn);
   1159     Mov(temp, operand);
   1160     ConditionalCompare(rn, temp, nzcv, cond, op);
   1161   }
   1162 }
   1163 
   1164 
   1165 void MacroAssembler::CselHelper(MacroAssembler* masm,
   1166                                 const Register& rd,
   1167                                 Operand left,
   1168                                 Operand right,
   1169                                 Condition cond,
   1170                                 bool* should_synthesise_left,
   1171                                 bool* should_synthesise_right) {
   1172   bool emit_code = (masm != NULL);
   1173 
   1174   VIXL_ASSERT(!emit_code || masm->allow_macro_instructions_);
   1175   VIXL_ASSERT((cond != al) && (cond != nv));
   1176   VIXL_ASSERT(!rd.IsZero() && !rd.IsSP());
   1177   VIXL_ASSERT(left.IsImmediate() || !left.GetRegister().IsSP());
   1178   VIXL_ASSERT(right.IsImmediate() || !right.GetRegister().IsSP());
   1179 
   1180   if (should_synthesise_left != NULL) *should_synthesise_left = false;
   1181   if (should_synthesise_right != NULL) *should_synthesise_right = false;
   1182 
   1183   // The worst case for size occurs when the inputs are two non encodable
   1184   // constants:
   1185   //  * up to 4 instructions to materialise the left constant
   1186   //  * up to 4 instructions to materialise the right constant
   1187   //  * 1 instruction for csel
   1188   EmissionCheckScope guard(masm, 9 * kInstructionSize);
   1189   UseScratchRegisterScope temps;
   1190   if (masm != NULL) {
   1191     temps.Open(masm);
   1192   }
   1193 
   1194   // Try to handle cases where both inputs are immediates.
   1195   bool left_is_immediate = left.IsImmediate() || left.IsZero();
   1196   bool right_is_immediate = right.IsImmediate() || right.IsZero();
   1197   if (left_is_immediate && right_is_immediate &&
   1198       CselSubHelperTwoImmediates(masm,
   1199                                  rd,
   1200                                  left.GetEquivalentImmediate(),
   1201                                  right.GetEquivalentImmediate(),
   1202                                  cond,
   1203                                  should_synthesise_left,
   1204                                  should_synthesise_right)) {
   1205     return;
   1206   }
   1207 
   1208   // Handle cases where one of the two inputs is -1, 0, or 1.
   1209   bool left_is_small_immediate =
   1210       left_is_immediate && ((-1 <= left.GetEquivalentImmediate()) &&
   1211                             (left.GetEquivalentImmediate() <= 1));
   1212   bool right_is_small_immediate =
   1213       right_is_immediate && ((-1 <= right.GetEquivalentImmediate()) &&
   1214                              (right.GetEquivalentImmediate() <= 1));
   1215   if (right_is_small_immediate || left_is_small_immediate) {
   1216     bool swapped_inputs = false;
   1217     if (!right_is_small_immediate) {
   1218       std::swap(left, right);
   1219       cond = InvertCondition(cond);
   1220       swapped_inputs = true;
   1221     }
   1222     CselSubHelperRightSmallImmediate(masm,
   1223                                      &temps,
   1224                                      rd,
   1225                                      left,
   1226                                      right,
   1227                                      cond,
   1228                                      swapped_inputs ? should_synthesise_right
   1229                                                     : should_synthesise_left);
   1230     return;
   1231   }
   1232 
   1233   // Otherwise both inputs need to be available in registers. Synthesise them
   1234   // if necessary and emit the `csel`.
   1235   if (!left.IsPlainRegister()) {
   1236     if (emit_code) {
   1237       Register temp = temps.AcquireSameSizeAs(rd);
   1238       masm->Mov(temp, left);
   1239       left = temp;
   1240     }
   1241     if (should_synthesise_left != NULL) *should_synthesise_left = true;
   1242   }
   1243   if (!right.IsPlainRegister()) {
   1244     if (emit_code) {
   1245       Register temp = temps.AcquireSameSizeAs(rd);
   1246       masm->Mov(temp, right);
   1247       right = temp;
   1248     }
   1249     if (should_synthesise_right != NULL) *should_synthesise_right = true;
   1250   }
   1251   if (emit_code) {
   1252     VIXL_ASSERT(left.IsPlainRegister() && right.IsPlainRegister());
   1253     if (left.GetRegister().Is(right.GetRegister())) {
   1254       masm->Mov(rd, left.GetRegister());
   1255     } else {
   1256       masm->csel(rd, left.GetRegister(), right.GetRegister(), cond);
   1257     }
   1258   }
   1259 }
   1260 
   1261 
   1262 bool MacroAssembler::CselSubHelperTwoImmediates(MacroAssembler* masm,
   1263                                                 const Register& rd,
   1264                                                 int64_t left,
   1265                                                 int64_t right,
   1266                                                 Condition cond,
   1267                                                 bool* should_synthesise_left,
   1268                                                 bool* should_synthesise_right) {
   1269   bool emit_code = (masm != NULL);
   1270   if (should_synthesise_left != NULL) *should_synthesise_left = false;
   1271   if (should_synthesise_right != NULL) *should_synthesise_right = false;
   1272 
   1273   if (left == right) {
   1274     if (emit_code) masm->Mov(rd, left);
   1275     return true;
   1276   } else if (left == -right) {
   1277     if (should_synthesise_right != NULL) *should_synthesise_right = true;
   1278     if (emit_code) {
   1279       masm->Mov(rd, right);
   1280       masm->Cneg(rd, rd, cond);
   1281     }
   1282     return true;
   1283   }
   1284 
   1285   if (CselSubHelperTwoOrderedImmediates(masm, rd, left, right, cond)) {
   1286     return true;
   1287   } else {
   1288     std::swap(left, right);
   1289     if (CselSubHelperTwoOrderedImmediates(masm,
   1290                                           rd,
   1291                                           left,
   1292                                           right,
   1293                                           InvertCondition(cond))) {
   1294       return true;
   1295     }
   1296   }
   1297 
   1298   // TODO: Handle more situations. For example handle `csel rd, #5, #6, cond`
   1299   // with `cinc`.
   1300   return false;
   1301 }
   1302 
   1303 
   1304 bool MacroAssembler::CselSubHelperTwoOrderedImmediates(MacroAssembler* masm,
   1305                                                        const Register& rd,
   1306                                                        int64_t left,
   1307                                                        int64_t right,
   1308                                                        Condition cond) {
   1309   bool emit_code = (masm != NULL);
   1310 
   1311   if ((left == 1) && (right == 0)) {
   1312     if (emit_code) masm->cset(rd, cond);
   1313     return true;
   1314   } else if ((left == -1) && (right == 0)) {
   1315     if (emit_code) masm->csetm(rd, cond);
   1316     return true;
   1317   }
   1318   return false;
   1319 }
   1320 
   1321 
   1322 void MacroAssembler::CselSubHelperRightSmallImmediate(
   1323     MacroAssembler* masm,
   1324     UseScratchRegisterScope* temps,
   1325     const Register& rd,
   1326     const Operand& left,
   1327     const Operand& right,
   1328     Condition cond,
   1329     bool* should_synthesise_left) {
   1330   bool emit_code = (masm != NULL);
   1331   VIXL_ASSERT((right.IsImmediate() || right.IsZero()) &&
   1332               (-1 <= right.GetEquivalentImmediate()) &&
   1333               (right.GetEquivalentImmediate() <= 1));
   1334   Register left_register;
   1335 
   1336   if (left.IsPlainRegister()) {
   1337     left_register = left.GetRegister();
   1338   } else {
   1339     if (emit_code) {
   1340       left_register = temps->AcquireSameSizeAs(rd);
   1341       masm->Mov(left_register, left);
   1342     }
   1343     if (should_synthesise_left != NULL) *should_synthesise_left = true;
   1344   }
   1345   if (emit_code) {
   1346     int64_t imm = right.GetEquivalentImmediate();
   1347     Register zr = AppropriateZeroRegFor(rd);
   1348     if (imm == 0) {
   1349       masm->csel(rd, left_register, zr, cond);
   1350     } else if (imm == 1) {
   1351       masm->csinc(rd, left_register, zr, cond);
   1352     } else {
   1353       VIXL_ASSERT(imm == -1);
   1354       masm->csinv(rd, left_register, zr, cond);
   1355     }
   1356   }
   1357 }
   1358 
   1359 
   1360 void MacroAssembler::Add(const Register& rd,
   1361                          const Register& rn,
   1362                          const Operand& operand,
   1363                          FlagsUpdate S) {
   1364   VIXL_ASSERT(allow_macro_instructions_);
   1365   if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
   1366       IsImmAddSub(-operand.GetImmediate())) {
   1367     AddSubMacro(rd, rn, -operand.GetImmediate(), S, SUB);
   1368   } else {
   1369     AddSubMacro(rd, rn, operand, S, ADD);
   1370   }
   1371 }
   1372 
   1373 
   1374 void MacroAssembler::Adds(const Register& rd,
   1375                           const Register& rn,
   1376                           const Operand& operand) {
   1377   Add(rd, rn, operand, SetFlags);
   1378 }
   1379 
   1380 
   1381 void MacroAssembler::Sub(const Register& rd,
   1382                          const Register& rn,
   1383                          const Operand& operand,
   1384                          FlagsUpdate S) {
   1385   VIXL_ASSERT(allow_macro_instructions_);
   1386   if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
   1387       IsImmAddSub(-operand.GetImmediate())) {
   1388     AddSubMacro(rd, rn, -operand.GetImmediate(), S, ADD);
   1389   } else {
   1390     AddSubMacro(rd, rn, operand, S, SUB);
   1391   }
   1392 }
   1393 
   1394 
   1395 void MacroAssembler::Subs(const Register& rd,
   1396                           const Register& rn,
   1397                           const Operand& operand) {
   1398   Sub(rd, rn, operand, SetFlags);
   1399 }
   1400 
   1401 
   1402 void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
   1403   VIXL_ASSERT(allow_macro_instructions_);
   1404   Adds(AppropriateZeroRegFor(rn), rn, operand);
   1405 }
   1406 
   1407 
   1408 void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
   1409   VIXL_ASSERT(allow_macro_instructions_);
   1410   Subs(AppropriateZeroRegFor(rn), rn, operand);
   1411 }
   1412 
   1413 
   1414 void MacroAssembler::Fcmp(const FPRegister& fn,
   1415                           double value,
   1416                           FPTrapFlags trap) {
   1417   VIXL_ASSERT(allow_macro_instructions_);
   1418   // The worst case for size is:
   1419   //  * 1 to materialise the constant, using literal pool if necessary
   1420   //  * 1 instruction for fcmp{e}
   1421   MacroEmissionCheckScope guard(this);
   1422   if (value != 0.0) {
   1423     UseScratchRegisterScope temps(this);
   1424     FPRegister tmp = temps.AcquireSameSizeAs(fn);
   1425     Fmov(tmp, value);
   1426     FPCompareMacro(fn, tmp, trap);
   1427   } else {
   1428     FPCompareMacro(fn, value, trap);
   1429   }
   1430 }
   1431 
   1432 
   1433 void MacroAssembler::Fcmpe(const FPRegister& fn, double value) {
   1434   Fcmp(fn, value, EnableTrap);
   1435 }
   1436 
   1437 
   1438 void MacroAssembler::Fmov(VRegister vd, double imm) {
   1439   VIXL_ASSERT(allow_macro_instructions_);
   1440   // Floating point immediates are loaded through the literal pool.
   1441   MacroEmissionCheckScope guard(this);
   1442 
   1443   if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
   1444     Fmov(vd, static_cast<float>(imm));
   1445     return;
   1446   }
   1447 
   1448   VIXL_ASSERT(vd.Is1D() || vd.Is2D());
   1449   if (IsImmFP64(imm)) {
   1450     fmov(vd, imm);
   1451   } else {
   1452     uint64_t rawbits = DoubleToRawbits(imm);
   1453     if (vd.IsScalar()) {
   1454       if (rawbits == 0) {
   1455         fmov(vd, xzr);
   1456       } else {
   1457         ldr(vd,
   1458             new Literal<double>(imm,
   1459                                 &literal_pool_,
   1460                                 RawLiteral::kDeletedOnPlacementByPool));
   1461       }
   1462     } else {
   1463       // TODO: consider NEON support for load literal.
   1464       Movi(vd, rawbits);
   1465     }
   1466   }
   1467 }
   1468 
   1469 
   1470 void MacroAssembler::Fmov(VRegister vd, float imm) {
   1471   VIXL_ASSERT(allow_macro_instructions_);
   1472   // Floating point immediates are loaded through the literal pool.
   1473   MacroEmissionCheckScope guard(this);
   1474 
   1475   if (vd.Is1D() || vd.Is2D()) {
   1476     Fmov(vd, static_cast<double>(imm));
   1477     return;
   1478   }
   1479 
   1480   VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
   1481   if (IsImmFP32(imm)) {
   1482     fmov(vd, imm);
   1483   } else {
   1484     uint32_t rawbits = FloatToRawbits(imm);
   1485     if (vd.IsScalar()) {
   1486       if (rawbits == 0) {
   1487         fmov(vd, wzr);
   1488       } else {
   1489         ldr(vd,
   1490             new Literal<float>(imm,
   1491                                &literal_pool_,
   1492                                RawLiteral::kDeletedOnPlacementByPool));
   1493       }
   1494     } else {
   1495       // TODO: consider NEON support for load literal.
   1496       Movi(vd, rawbits);
   1497     }
   1498   }
   1499 }
   1500 
   1501 
   1502 void MacroAssembler::Neg(const Register& rd, const Operand& operand) {
   1503   VIXL_ASSERT(allow_macro_instructions_);
   1504   if (operand.IsImmediate()) {
   1505     Mov(rd, -operand.GetImmediate());
   1506   } else {
   1507     Sub(rd, AppropriateZeroRegFor(rd), operand);
   1508   }
   1509 }
   1510 
   1511 
   1512 void MacroAssembler::Negs(const Register& rd, const Operand& operand) {
   1513   VIXL_ASSERT(allow_macro_instructions_);
   1514   Subs(rd, AppropriateZeroRegFor(rd), operand);
   1515 }
   1516 
   1517 
   1518 bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
   1519                                               int64_t imm) {
   1520   return OneInstrMoveImmediateHelper(this, dst, imm);
   1521 }
   1522 
   1523 
   1524 Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
   1525                                                   int64_t imm) {
   1526   int reg_size = dst.GetSizeInBits();
   1527 
   1528   // Encode the immediate in a single move instruction, if possible.
   1529   if (TryOneInstrMoveImmediate(dst, imm)) {
   1530     // The move was successful; nothing to do here.
   1531   } else {
   1532     // Pre-shift the immediate to the least-significant bits of the register.
   1533     int shift_low = CountTrailingZeros(imm, reg_size);
   1534     int64_t imm_low = imm >> shift_low;
   1535 
   1536     // Pre-shift the immediate to the most-significant bits of the register,
   1537     // inserting set bits in the least-significant bits.
   1538     int shift_high = CountLeadingZeros(imm, reg_size);
   1539     int64_t imm_high = (imm << shift_high) | ((INT64_C(1) << shift_high) - 1);
   1540 
   1541     if (TryOneInstrMoveImmediate(dst, imm_low)) {
   1542       // The new immediate has been moved into the destination's low bits:
   1543       // return a new leftward-shifting operand.
   1544       return Operand(dst, LSL, shift_low);
   1545     } else if (TryOneInstrMoveImmediate(dst, imm_high)) {
   1546       // The new immediate has been moved into the destination's high bits:
   1547       // return a new rightward-shifting operand.
   1548       return Operand(dst, LSR, shift_high);
   1549     } else {
   1550       Mov(dst, imm);
   1551     }
   1552   }
   1553   return Operand(dst);
   1554 }
   1555 
   1556 
   1557 void MacroAssembler::Move(const GenericOperand& dst,
   1558                           const GenericOperand& src) {
   1559   if (dst.Equals(src)) {
   1560     return;
   1561   }
   1562 
   1563   VIXL_ASSERT(dst.IsValid() && src.IsValid());
   1564 
   1565   // The sizes of the operands must match exactly.
   1566   VIXL_ASSERT(dst.GetSizeInBits() == src.GetSizeInBits());
   1567   VIXL_ASSERT(dst.GetSizeInBits() <= kXRegSize);
   1568   int operand_size = static_cast<int>(dst.GetSizeInBits());
   1569 
   1570   if (dst.IsCPURegister() && src.IsCPURegister()) {
   1571     CPURegister dst_reg = dst.GetCPURegister();
   1572     CPURegister src_reg = src.GetCPURegister();
   1573     if (dst_reg.IsRegister() && src_reg.IsRegister()) {
   1574       Mov(Register(dst_reg), Register(src_reg));
   1575     } else if (dst_reg.IsVRegister() && src_reg.IsVRegister()) {
   1576       Fmov(VRegister(dst_reg), VRegister(src_reg));
   1577     } else {
   1578       if (dst_reg.IsRegister()) {
   1579         Fmov(Register(dst_reg), VRegister(src_reg));
   1580       } else {
   1581         Fmov(VRegister(dst_reg), Register(src_reg));
   1582       }
   1583     }
   1584     return;
   1585   }
   1586 
   1587   if (dst.IsMemOperand() && src.IsMemOperand()) {
   1588     UseScratchRegisterScope temps(this);
   1589     CPURegister temp = temps.AcquireCPURegisterOfSize(operand_size);
   1590     Ldr(temp, src.GetMemOperand());
   1591     Str(temp, dst.GetMemOperand());
   1592     return;
   1593   }
   1594 
   1595   if (dst.IsCPURegister()) {
   1596     Ldr(dst.GetCPURegister(), src.GetMemOperand());
   1597   } else {
   1598     Str(src.GetCPURegister(), dst.GetMemOperand());
   1599   }
   1600 }
   1601 
   1602 
   1603 void MacroAssembler::ComputeAddress(const Register& dst,
   1604                                     const MemOperand& mem_op) {
   1605   // We cannot handle pre-indexing or post-indexing.
   1606   VIXL_ASSERT(mem_op.GetAddrMode() == Offset);
   1607   Register base = mem_op.GetBaseRegister();
   1608   if (mem_op.IsImmediateOffset()) {
   1609     Add(dst, base, mem_op.GetOffset());
   1610   } else {
   1611     VIXL_ASSERT(mem_op.IsRegisterOffset());
   1612     Register reg_offset = mem_op.GetRegisterOffset();
   1613     Shift shift = mem_op.GetShift();
   1614     Extend extend = mem_op.GetExtend();
   1615     if (shift == NO_SHIFT) {
   1616       VIXL_ASSERT(extend != NO_EXTEND);
   1617       Add(dst, base, Operand(reg_offset, extend, mem_op.GetShiftAmount()));
   1618     } else {
   1619       VIXL_ASSERT(extend == NO_EXTEND);
   1620       Add(dst, base, Operand(reg_offset, shift, mem_op.GetShiftAmount()));
   1621     }
   1622   }
   1623 }
   1624 
   1625 
   1626 void MacroAssembler::AddSubMacro(const Register& rd,
   1627                                  const Register& rn,
   1628                                  const Operand& operand,
   1629                                  FlagsUpdate S,
   1630                                  AddSubOp op) {
   1631   // Worst case is add/sub immediate:
   1632   //  * up to 4 instructions to materialise the constant
   1633   //  * 1 instruction for add/sub
   1634   MacroEmissionCheckScope guard(this);
   1635 
   1636   if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
   1637       (S == LeaveFlags)) {
   1638     // The instruction would be a nop. Avoid generating useless code.
   1639     return;
   1640   }
   1641 
   1642   if ((operand.IsImmediate() && !IsImmAddSub(operand.GetImmediate())) ||
   1643       (rn.IsZero() && !operand.IsShiftedRegister()) ||
   1644       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
   1645     UseScratchRegisterScope temps(this);
   1646     Register temp = temps.AcquireSameSizeAs(rn);
   1647     if (operand.IsImmediate()) {
   1648       Operand imm_operand =
   1649           MoveImmediateForShiftedOp(temp, operand.GetImmediate());
   1650       AddSub(rd, rn, imm_operand, S, op);
   1651     } else {
   1652       Mov(temp, operand);
   1653       AddSub(rd, rn, temp, S, op);
   1654     }
   1655   } else {
   1656     AddSub(rd, rn, operand, S, op);
   1657   }
   1658 }
   1659 
   1660 
   1661 void MacroAssembler::Adc(const Register& rd,
   1662                          const Register& rn,
   1663                          const Operand& operand) {
   1664   VIXL_ASSERT(allow_macro_instructions_);
   1665   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
   1666 }
   1667 
   1668 
   1669 void MacroAssembler::Adcs(const Register& rd,
   1670                           const Register& rn,
   1671                           const Operand& operand) {
   1672   VIXL_ASSERT(allow_macro_instructions_);
   1673   AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
   1674 }
   1675 
   1676 
   1677 void MacroAssembler::Sbc(const Register& rd,
   1678                          const Register& rn,
   1679                          const Operand& operand) {
   1680   VIXL_ASSERT(allow_macro_instructions_);
   1681   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
   1682 }
   1683 
   1684 
   1685 void MacroAssembler::Sbcs(const Register& rd,
   1686                           const Register& rn,
   1687                           const Operand& operand) {
   1688   VIXL_ASSERT(allow_macro_instructions_);
   1689   AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
   1690 }
   1691 
   1692 
   1693 void MacroAssembler::Ngc(const Register& rd, const Operand& operand) {
   1694   VIXL_ASSERT(allow_macro_instructions_);
   1695   Register zr = AppropriateZeroRegFor(rd);
   1696   Sbc(rd, zr, operand);
   1697 }
   1698 
   1699 
   1700 void MacroAssembler::Ngcs(const Register& rd, const Operand& operand) {
   1701   VIXL_ASSERT(allow_macro_instructions_);
   1702   Register zr = AppropriateZeroRegFor(rd);
   1703   Sbcs(rd, zr, operand);
   1704 }
   1705 
   1706 
   1707 void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
   1708                                           const Register& rn,
   1709                                           const Operand& operand,
   1710                                           FlagsUpdate S,
   1711                                           AddSubWithCarryOp op) {
   1712   VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits());
   1713   // Worst case is addc/subc immediate:
   1714   //  * up to 4 instructions to materialise the constant
   1715   //  * 1 instruction for add/sub
   1716   MacroEmissionCheckScope guard(this);
   1717   UseScratchRegisterScope temps(this);
   1718 
   1719   if (operand.IsImmediate() ||
   1720       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
   1721     // Add/sub with carry (immediate or ROR shifted register.)
   1722     Register temp = temps.AcquireSameSizeAs(rn);
   1723     Mov(temp, operand);
   1724     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   1725   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
   1726     // Add/sub with carry (shifted register).
   1727     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits());
   1728     VIXL_ASSERT(operand.GetShift() != ROR);
   1729     VIXL_ASSERT(
   1730         IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
   1731                 operand.GetShiftAmount()));
   1732     temps.Exclude(operand.GetRegister());
   1733     Register temp = temps.AcquireSameSizeAs(rn);
   1734     EmitShift(temp,
   1735               operand.GetRegister(),
   1736               operand.GetShift(),
   1737               operand.GetShiftAmount());
   1738     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   1739   } else if (operand.IsExtendedRegister()) {
   1740     // Add/sub with carry (extended register).
   1741     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
   1742     // Add/sub extended supports a shift <= 4. We want to support exactly the
   1743     // same modes.
   1744     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
   1745     VIXL_ASSERT(
   1746         operand.GetRegister().Is64Bits() ||
   1747         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
   1748     temps.Exclude(operand.GetRegister());
   1749     Register temp = temps.AcquireSameSizeAs(rn);
   1750     EmitExtendShift(temp,
   1751                     operand.GetRegister(),
   1752                     operand.GetExtend(),
   1753                     operand.GetShiftAmount());
   1754     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   1755   } else {
   1756     // The addressing mode is directly supported by the instruction.
   1757     AddSubWithCarry(rd, rn, operand, S, op);
   1758   }
   1759 }
   1760 
   1761 
   1762 #define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                          \
   1763   void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
   1764     VIXL_ASSERT(allow_macro_instructions_);                            \
   1765     LoadStoreMacro(REG, addr, OP);                                     \
   1766   }
   1767 LS_MACRO_LIST(DEFINE_FUNCTION)
   1768 #undef DEFINE_FUNCTION
   1769 
   1770 
   1771 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
   1772                                     const MemOperand& addr,
   1773                                     LoadStoreOp op) {
   1774   // Worst case is ldr/str pre/post index:
   1775   //  * 1 instruction for ldr/str
   1776   //  * up to 4 instructions to materialise the constant
   1777   //  * 1 instruction to update the base
   1778   MacroEmissionCheckScope guard(this);
   1779 
   1780   int64_t offset = addr.GetOffset();
   1781   unsigned access_size = CalcLSDataSize(op);
   1782 
   1783   // Check if an immediate offset fits in the immediate field of the
   1784   // appropriate instruction. If not, emit two instructions to perform
   1785   // the operation.
   1786   if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) &&
   1787       !IsImmLSUnscaled(offset)) {
   1788     // Immediate offset that can't be encoded using unsigned or unscaled
   1789     // addressing modes.
   1790     UseScratchRegisterScope temps(this);
   1791     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
   1792     Mov(temp, addr.GetOffset());
   1793     LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op);
   1794   } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
   1795     // Post-index beyond unscaled addressing range.
   1796     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
   1797     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
   1798   } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
   1799     // Pre-index beyond unscaled addressing range.
   1800     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
   1801     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
   1802   } else {
   1803     // Encodable in one load/store instruction.
   1804     LoadStore(rt, addr, op);
   1805   }
   1806 }
   1807 
   1808 
   1809 #define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
   1810   void MacroAssembler::FN(const REGTYPE REG,        \
   1811                           const REGTYPE REG2,       \
   1812                           const MemOperand& addr) { \
   1813     VIXL_ASSERT(allow_macro_instructions_);         \
   1814     LoadStorePairMacro(REG, REG2, addr, OP);        \
   1815   }
   1816 LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
   1817 #undef DEFINE_FUNCTION
   1818 
   1819 void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
   1820                                         const CPURegister& rt2,
   1821                                         const MemOperand& addr,
   1822                                         LoadStorePairOp op) {
   1823   // TODO(all): Should we support register offset for load-store-pair?
   1824   VIXL_ASSERT(!addr.IsRegisterOffset());
   1825   // Worst case is ldp/stp immediate:
   1826   //  * 1 instruction for ldp/stp
   1827   //  * up to 4 instructions to materialise the constant
   1828   //  * 1 instruction to update the base
   1829   MacroEmissionCheckScope guard(this);
   1830 
   1831   int64_t offset = addr.GetOffset();
   1832   unsigned access_size = CalcLSPairDataSize(op);
   1833 
   1834   // Check if the offset fits in the immediate field of the appropriate
   1835   // instruction. If not, emit two instructions to perform the operation.
   1836   if (IsImmLSPair(offset, access_size)) {
   1837     // Encodable in one load/store pair instruction.
   1838     LoadStorePair(rt, rt2, addr, op);
   1839   } else {
   1840     Register base = addr.GetBaseRegister();
   1841     if (addr.IsImmediateOffset()) {
   1842       UseScratchRegisterScope temps(this);
   1843       Register temp = temps.AcquireSameSizeAs(base);
   1844       Add(temp, base, offset);
   1845       LoadStorePair(rt, rt2, MemOperand(temp), op);
   1846     } else if (addr.IsPostIndex()) {
   1847       LoadStorePair(rt, rt2, MemOperand(base), op);
   1848       Add(base, base, offset);
   1849     } else {
   1850       VIXL_ASSERT(addr.IsPreIndex());
   1851       Add(base, base, offset);
   1852       LoadStorePair(rt, rt2, MemOperand(base), op);
   1853     }
   1854   }
   1855 }
   1856 
   1857 
   1858 void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
   1859   MacroEmissionCheckScope guard(this);
   1860 
   1861   // There are no pre- or post-index modes for prfm.
   1862   VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
   1863 
   1864   // The access size is implicitly 8 bytes for all prefetch operations.
   1865   unsigned size = kXRegSizeInBytesLog2;
   1866 
   1867   // Check if an immediate offset fits in the immediate field of the
   1868   // appropriate instruction. If not, emit two instructions to perform
   1869   // the operation.
   1870   if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.GetOffset(), size) &&
   1871       !IsImmLSUnscaled(addr.GetOffset())) {
   1872     // Immediate offset that can't be encoded using unsigned or unscaled
   1873     // addressing modes.
   1874     UseScratchRegisterScope temps(this);
   1875     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
   1876     Mov(temp, addr.GetOffset());
   1877     Prefetch(op, MemOperand(addr.GetBaseRegister(), temp));
   1878   } else {
   1879     // Simple register-offsets are encodable in one instruction.
   1880     Prefetch(op, addr);
   1881   }
   1882 }
   1883 
   1884 
   1885 void MacroAssembler::Push(const CPURegister& src0,
   1886                           const CPURegister& src1,
   1887                           const CPURegister& src2,
   1888                           const CPURegister& src3) {
   1889   VIXL_ASSERT(allow_macro_instructions_);
   1890   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
   1891   VIXL_ASSERT(src0.IsValid());
   1892 
   1893   int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
   1894   int size = src0.GetSizeInBytes();
   1895 
   1896   PrepareForPush(count, size);
   1897   PushHelper(count, size, src0, src1, src2, src3);
   1898 }
   1899 
   1900 
   1901 void MacroAssembler::Pop(const CPURegister& dst0,
   1902                          const CPURegister& dst1,
   1903                          const CPURegister& dst2,
   1904                          const CPURegister& dst3) {
   1905   // It is not valid to pop into the same register more than once in one
   1906   // instruction, not even into the zero register.
   1907   VIXL_ASSERT(allow_macro_instructions_);
   1908   VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
   1909   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
   1910   VIXL_ASSERT(dst0.IsValid());
   1911 
   1912   int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
   1913   int size = dst0.GetSizeInBytes();
   1914 
   1915   PrepareForPop(count, size);
   1916   PopHelper(count, size, dst0, dst1, dst2, dst3);
   1917 }
   1918 
   1919 
   1920 void MacroAssembler::PushCPURegList(CPURegList registers) {
   1921   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
   1922   VIXL_ASSERT(!registers.Overlaps(*GetScratchFPRegisterList()));
   1923   VIXL_ASSERT(allow_macro_instructions_);
   1924 
   1925   int reg_size = registers.GetRegisterSizeInBytes();
   1926   PrepareForPush(registers.GetCount(), reg_size);
   1927 
   1928   // Bump the stack pointer and store two registers at the bottom.
   1929   int size = registers.GetTotalSizeInBytes();
   1930   const CPURegister& bottom_0 = registers.PopLowestIndex();
   1931   const CPURegister& bottom_1 = registers.PopLowestIndex();
   1932   if (bottom_0.IsValid() && bottom_1.IsValid()) {
   1933     Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex));
   1934   } else if (bottom_0.IsValid()) {
   1935     Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex));
   1936   }
   1937 
   1938   int offset = 2 * reg_size;
   1939   while (!registers.IsEmpty()) {
   1940     const CPURegister& src0 = registers.PopLowestIndex();
   1941     const CPURegister& src1 = registers.PopLowestIndex();
   1942     if (src1.IsValid()) {
   1943       Stp(src0, src1, MemOperand(StackPointer(), offset));
   1944     } else {
   1945       Str(src0, MemOperand(StackPointer(), offset));
   1946     }
   1947     offset += 2 * reg_size;
   1948   }
   1949 }
   1950 
   1951 
   1952 void MacroAssembler::PopCPURegList(CPURegList registers) {
   1953   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
   1954   VIXL_ASSERT(!registers.Overlaps(*GetScratchFPRegisterList()));
   1955   VIXL_ASSERT(allow_macro_instructions_);
   1956 
   1957   int reg_size = registers.GetRegisterSizeInBytes();
   1958   PrepareForPop(registers.GetCount(), reg_size);
   1959 
   1960 
   1961   int size = registers.GetTotalSizeInBytes();
   1962   const CPURegister& bottom_0 = registers.PopLowestIndex();
   1963   const CPURegister& bottom_1 = registers.PopLowestIndex();
   1964 
   1965   int offset = 2 * reg_size;
   1966   while (!registers.IsEmpty()) {
   1967     const CPURegister& dst0 = registers.PopLowestIndex();
   1968     const CPURegister& dst1 = registers.PopLowestIndex();
   1969     if (dst1.IsValid()) {
   1970       Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
   1971     } else {
   1972       Ldr(dst0, MemOperand(StackPointer(), offset));
   1973     }
   1974     offset += 2 * reg_size;
   1975   }
   1976 
   1977   // Load the two registers at the bottom and drop the stack pointer.
   1978   if (bottom_0.IsValid() && bottom_1.IsValid()) {
   1979     Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex));
   1980   } else if (bottom_0.IsValid()) {
   1981     Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex));
   1982   }
   1983 }
   1984 
   1985 
   1986 void MacroAssembler::PushMultipleTimes(int count, Register src) {
   1987   VIXL_ASSERT(allow_macro_instructions_);
   1988   int size = src.GetSizeInBytes();
   1989 
   1990   PrepareForPush(count, size);
   1991   // Push up to four registers at a time if possible because if the current
   1992   // stack pointer is sp and the register size is 32, registers must be pushed
   1993   // in blocks of four in order to maintain the 16-byte alignment for sp.
   1994   while (count >= 4) {
   1995     PushHelper(4, size, src, src, src, src);
   1996     count -= 4;
   1997   }
   1998   if (count >= 2) {
   1999     PushHelper(2, size, src, src, NoReg, NoReg);
   2000     count -= 2;
   2001   }
   2002   if (count == 1) {
   2003     PushHelper(1, size, src, NoReg, NoReg, NoReg);
   2004     count -= 1;
   2005   }
   2006   VIXL_ASSERT(count == 0);
   2007 }
   2008 
   2009 
   2010 void MacroAssembler::PushHelper(int count,
   2011                                 int size,
   2012                                 const CPURegister& src0,
   2013                                 const CPURegister& src1,
   2014                                 const CPURegister& src2,
   2015                                 const CPURegister& src3) {
   2016   // Ensure that we don't unintentionally modify scratch or debug registers.
   2017   // Worst case for size is 2 stp.
   2018   ExactAssemblyScope scope(this,
   2019                            2 * kInstructionSize,
   2020                            ExactAssemblyScope::kMaximumSize);
   2021 
   2022   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
   2023   VIXL_ASSERT(size == src0.GetSizeInBytes());
   2024 
   2025   // When pushing multiple registers, the store order is chosen such that
   2026   // Push(a, b) is equivalent to Push(a) followed by Push(b).
   2027   switch (count) {
   2028     case 1:
   2029       VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
   2030       str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
   2031       break;
   2032     case 2:
   2033       VIXL_ASSERT(src2.IsNone() && src3.IsNone());
   2034       stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
   2035       break;
   2036     case 3:
   2037       VIXL_ASSERT(src3.IsNone());
   2038       stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
   2039       str(src0, MemOperand(StackPointer(), 2 * size));
   2040       break;
   2041     case 4:
   2042       // Skip over 4 * size, then fill in the gap. This allows four W registers
   2043       // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
   2044       // all times.
   2045       stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
   2046       stp(src1, src0, MemOperand(StackPointer(), 2 * size));
   2047       break;
   2048     default:
   2049       VIXL_UNREACHABLE();
   2050   }
   2051 }
   2052 
   2053 
   2054 void MacroAssembler::PopHelper(int count,
   2055                                int size,
   2056                                const CPURegister& dst0,
   2057                                const CPURegister& dst1,
   2058                                const CPURegister& dst2,
   2059                                const CPURegister& dst3) {
   2060   // Ensure that we don't unintentionally modify scratch or debug registers.
   2061   // Worst case for size is 2 ldp.
   2062   ExactAssemblyScope scope(this,
   2063                            2 * kInstructionSize,
   2064                            ExactAssemblyScope::kMaximumSize);
   2065 
   2066   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
   2067   VIXL_ASSERT(size == dst0.GetSizeInBytes());
   2068 
   2069   // When popping multiple registers, the load order is chosen such that
   2070   // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
   2071   switch (count) {
   2072     case 1:
   2073       VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
   2074       ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
   2075       break;
   2076     case 2:
   2077       VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
   2078       ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
   2079       break;
   2080     case 3:
   2081       VIXL_ASSERT(dst3.IsNone());
   2082       ldr(dst2, MemOperand(StackPointer(), 2 * size));
   2083       ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
   2084       break;
   2085     case 4:
   2086       // Load the higher addresses first, then load the lower addresses and skip
   2087       // the whole block in the second instruction. This allows four W registers
   2088       // to be popped using sp, whilst maintaining 16-byte alignment for sp at
   2089       // all times.
   2090       ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
   2091       ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
   2092       break;
   2093     default:
   2094       VIXL_UNREACHABLE();
   2095   }
   2096 }
   2097 
   2098 
   2099 void MacroAssembler::PrepareForPush(int count, int size) {
   2100   if (sp.Is(StackPointer())) {
   2101     // If the current stack pointer is sp, then it must be aligned to 16 bytes
   2102     // on entry and the total size of the specified registers must also be a
   2103     // multiple of 16 bytes.
   2104     VIXL_ASSERT((count * size) % 16 == 0);
   2105   } else {
   2106     // Even if the current stack pointer is not the system stack pointer (sp),
   2107     // the system stack pointer will still be modified in order to comply with
   2108     // ABI rules about accessing memory below the system stack pointer.
   2109     BumpSystemStackPointer(count * size);
   2110   }
   2111 }
   2112 
   2113 
   2114 void MacroAssembler::PrepareForPop(int count, int size) {
   2115   USE(count, size);
   2116   if (sp.Is(StackPointer())) {
   2117     // If the current stack pointer is sp, then it must be aligned to 16 bytes
   2118     // on entry and the total size of the specified registers must also be a
   2119     // multiple of 16 bytes.
   2120     VIXL_ASSERT((count * size) % 16 == 0);
   2121   }
   2122 }
   2123 
   2124 void MacroAssembler::Poke(const Register& src, const Operand& offset) {
   2125   VIXL_ASSERT(allow_macro_instructions_);
   2126   if (offset.IsImmediate()) {
   2127     VIXL_ASSERT(offset.GetImmediate() >= 0);
   2128   }
   2129 
   2130   Str(src, MemOperand(StackPointer(), offset));
   2131 }
   2132 
   2133 
   2134 void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
   2135   VIXL_ASSERT(allow_macro_instructions_);
   2136   if (offset.IsImmediate()) {
   2137     VIXL_ASSERT(offset.GetImmediate() >= 0);
   2138   }
   2139 
   2140   Ldr(dst, MemOperand(StackPointer(), offset));
   2141 }
   2142 
   2143 
   2144 void MacroAssembler::Claim(const Operand& size) {
   2145   VIXL_ASSERT(allow_macro_instructions_);
   2146 
   2147   if (size.IsZero()) {
   2148     return;
   2149   }
   2150 
   2151   if (size.IsImmediate()) {
   2152     VIXL_ASSERT(size.GetImmediate() > 0);
   2153     if (sp.Is(StackPointer())) {
   2154       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
   2155     }
   2156   }
   2157 
   2158   if (!sp.Is(StackPointer())) {
   2159     BumpSystemStackPointer(size);
   2160   }
   2161 
   2162   Sub(StackPointer(), StackPointer(), size);
   2163 }
   2164 
   2165 
   2166 void MacroAssembler::Drop(const Operand& size) {
   2167   VIXL_ASSERT(allow_macro_instructions_);
   2168 
   2169   if (size.IsZero()) {
   2170     return;
   2171   }
   2172 
   2173   if (size.IsImmediate()) {
   2174     VIXL_ASSERT(size.GetImmediate() > 0);
   2175     if (sp.Is(StackPointer())) {
   2176       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
   2177     }
   2178   }
   2179 
   2180   Add(StackPointer(), StackPointer(), size);
   2181 }
   2182 
   2183 
   2184 void MacroAssembler::PushCalleeSavedRegisters() {
   2185   // Ensure that the macro-assembler doesn't use any scratch registers.
   2186   // 10 stp will be emitted.
   2187   // TODO(all): Should we use GetCalleeSaved and SavedFP.
   2188   ExactAssemblyScope scope(this, 10 * kInstructionSize);
   2189 
   2190   // This method must not be called unless the current stack pointer is sp.
   2191   VIXL_ASSERT(sp.Is(StackPointer()));
   2192 
   2193   MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex);
   2194 
   2195   stp(x29, x30, tos);
   2196   stp(x27, x28, tos);
   2197   stp(x25, x26, tos);
   2198   stp(x23, x24, tos);
   2199   stp(x21, x22, tos);
   2200   stp(x19, x20, tos);
   2201 
   2202   stp(d14, d15, tos);
   2203   stp(d12, d13, tos);
   2204   stp(d10, d11, tos);
   2205   stp(d8, d9, tos);
   2206 }
   2207 
   2208 
   2209 void MacroAssembler::PopCalleeSavedRegisters() {
   2210   // Ensure that the macro-assembler doesn't use any scratch registers.
   2211   // 10 ldp will be emitted.
   2212   // TODO(all): Should we use GetCalleeSaved and SavedFP.
   2213   ExactAssemblyScope scope(this, 10 * kInstructionSize);
   2214 
   2215   // This method must not be called unless the current stack pointer is sp.
   2216   VIXL_ASSERT(sp.Is(StackPointer()));
   2217 
   2218   MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
   2219 
   2220   ldp(d8, d9, tos);
   2221   ldp(d10, d11, tos);
   2222   ldp(d12, d13, tos);
   2223   ldp(d14, d15, tos);
   2224 
   2225   ldp(x19, x20, tos);
   2226   ldp(x21, x22, tos);
   2227   ldp(x23, x24, tos);
   2228   ldp(x25, x26, tos);
   2229   ldp(x27, x28, tos);
   2230   ldp(x29, x30, tos);
   2231 }
   2232 
   2233 void MacroAssembler::LoadCPURegList(CPURegList registers,
   2234                                     const MemOperand& src) {
   2235   LoadStoreCPURegListHelper(kLoad, registers, src);
   2236 }
   2237 
   2238 void MacroAssembler::StoreCPURegList(CPURegList registers,
   2239                                      const MemOperand& dst) {
   2240   LoadStoreCPURegListHelper(kStore, registers, dst);
   2241 }
   2242 
   2243 
   2244 void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
   2245                                                CPURegList registers,
   2246                                                const MemOperand& mem) {
   2247   // We do not handle pre-indexing or post-indexing.
   2248   VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
   2249   VIXL_ASSERT(!registers.Overlaps(tmp_list_));
   2250   VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
   2251   VIXL_ASSERT(!registers.IncludesAliasOf(sp));
   2252 
   2253   UseScratchRegisterScope temps(this);
   2254 
   2255   MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers, mem, &temps);
   2256   const int reg_size = registers.GetRegisterSizeInBytes();
   2257 
   2258   VIXL_ASSERT(IsPowerOf2(reg_size));
   2259 
   2260   // Since we are operating on register pairs, we would like to align on double
   2261   // the standard size; on the other hand, we don't want to insert an extra
   2262   // operation, which will happen if the number of registers is even. Note that
   2263   // the alignment of the base pointer is unknown here, but we assume that it
   2264   // is more likely to be aligned.
   2265   if (((loc.GetOffset() & (2 * reg_size - 1)) != 0) &&
   2266       ((registers.GetCount() % 2) != 0)) {
   2267     if (op == kStore) {
   2268       Str(registers.PopLowestIndex(), loc);
   2269     } else {
   2270       VIXL_ASSERT(op == kLoad);
   2271       Ldr(registers.PopLowestIndex(), loc);
   2272     }
   2273     loc.AddOffset(reg_size);
   2274   }
   2275   while (registers.GetCount() >= 2) {
   2276     const CPURegister& dst0 = registers.PopLowestIndex();
   2277     const CPURegister& dst1 = registers.PopLowestIndex();
   2278     if (op == kStore) {
   2279       Stp(dst0, dst1, loc);
   2280     } else {
   2281       VIXL_ASSERT(op == kLoad);
   2282       Ldp(dst0, dst1, loc);
   2283     }
   2284     loc.AddOffset(2 * reg_size);
   2285   }
   2286   if (!registers.IsEmpty()) {
   2287     if (op == kStore) {
   2288       Str(registers.PopLowestIndex(), loc);
   2289     } else {
   2290       VIXL_ASSERT(op == kLoad);
   2291       Ldr(registers.PopLowestIndex(), loc);
   2292     }
   2293   }
   2294 }
   2295 
   2296 MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
   2297     const CPURegList& registers,
   2298     const MemOperand& mem,
   2299     UseScratchRegisterScope* scratch_scope) {
   2300   // If necessary, pre-compute the base address for the accesses.
   2301   if (mem.IsRegisterOffset()) {
   2302     Register reg_base = scratch_scope->AcquireX();
   2303     ComputeAddress(reg_base, mem);
   2304     return MemOperand(reg_base);
   2305 
   2306   } else if (mem.IsImmediateOffset()) {
   2307     int reg_size = registers.GetRegisterSizeInBytes();
   2308     int total_size = registers.GetTotalSizeInBytes();
   2309     int64_t min_offset = mem.GetOffset();
   2310     int64_t max_offset =
   2311         mem.GetOffset() + std::max(0, total_size - 2 * reg_size);
   2312     if ((registers.GetCount() >= 2) &&
   2313         (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
   2314          !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
   2315       Register reg_base = scratch_scope->AcquireX();
   2316       ComputeAddress(reg_base, mem);
   2317       return MemOperand(reg_base);
   2318     }
   2319   }
   2320 
   2321   return mem;
   2322 }
   2323 
   2324 void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
   2325   VIXL_ASSERT(!sp.Is(StackPointer()));
   2326   // TODO: Several callers rely on this not using scratch registers, so we use
   2327   // the assembler directly here. However, this means that large immediate
   2328   // values of 'space' cannot be handled.
   2329   ExactAssemblyScope scope(this, kInstructionSize);
   2330   sub(sp, StackPointer(), space);
   2331 }
   2332 
   2333 
   2334 // TODO(all): Fix printf for NEON registers, and resolve whether we should be
   2335 // using FPRegister or VRegister here.
   2336 
   2337 // This is the main Printf implementation. All callee-saved registers are
   2338 // preserved, but NZCV and the caller-saved registers may be clobbered.
   2339 void MacroAssembler::PrintfNoPreserve(const char* format,
   2340                                       const CPURegister& arg0,
   2341                                       const CPURegister& arg1,
   2342                                       const CPURegister& arg2,
   2343                                       const CPURegister& arg3) {
   2344   // We cannot handle a caller-saved stack pointer. It doesn't make much sense
   2345   // in most cases anyway, so this restriction shouldn't be too serious.
   2346   VIXL_ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
   2347 
   2348   // The provided arguments, and their proper PCS registers.
   2349   CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
   2350   CPURegister pcs[kPrintfMaxArgCount];
   2351 
   2352   int arg_count = kPrintfMaxArgCount;
   2353 
   2354   // The PCS varargs registers for printf. Note that x0 is used for the printf
   2355   // format string.
   2356   static const CPURegList kPCSVarargs =
   2357       CPURegList(CPURegister::kRegister, kXRegSize, 1, arg_count);
   2358   static const CPURegList kPCSVarargsFP =
   2359       CPURegList(CPURegister::kVRegister, kDRegSize, 0, arg_count - 1);
   2360 
   2361   // We can use caller-saved registers as scratch values, except for the
   2362   // arguments and the PCS registers where they might need to go.
   2363   UseScratchRegisterScope temps(this);
   2364   temps.Include(kCallerSaved);
   2365   temps.Include(kCallerSavedV);
   2366   temps.Exclude(kPCSVarargs);
   2367   temps.Exclude(kPCSVarargsFP);
   2368   temps.Exclude(arg0, arg1, arg2, arg3);
   2369 
   2370   // Copies of the arg lists that we can iterate through.
   2371   CPURegList pcs_varargs = kPCSVarargs;
   2372   CPURegList pcs_varargs_fp = kPCSVarargsFP;
   2373 
   2374   // Place the arguments. There are lots of clever tricks and optimizations we
   2375   // could use here, but Printf is a debug tool so instead we just try to keep
   2376   // it simple: Move each input that isn't already in the right place to a
   2377   // scratch register, then move everything back.
   2378   for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
   2379     // Work out the proper PCS register for this argument.
   2380     if (args[i].IsRegister()) {
   2381       pcs[i] = pcs_varargs.PopLowestIndex().X();
   2382       // We might only need a W register here. We need to know the size of the
   2383       // argument so we can properly encode it for the simulator call.
   2384       if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
   2385     } else if (args[i].IsVRegister()) {
   2386       // In C, floats are always cast to doubles for varargs calls.
   2387       pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
   2388     } else {
   2389       VIXL_ASSERT(args[i].IsNone());
   2390       arg_count = i;
   2391       break;
   2392     }
   2393 
   2394     // If the argument is already in the right place, leave it where it is.
   2395     if (args[i].Aliases(pcs[i])) continue;
   2396 
   2397     // Otherwise, if the argument is in a PCS argument register, allocate an
   2398     // appropriate scratch register and then move it out of the way.
   2399     if (kPCSVarargs.IncludesAliasOf(args[i]) ||
   2400         kPCSVarargsFP.IncludesAliasOf(args[i])) {
   2401       if (args[i].IsRegister()) {
   2402         Register old_arg = Register(args[i]);
   2403         Register new_arg = temps.AcquireSameSizeAs(old_arg);
   2404         Mov(new_arg, old_arg);
   2405         args[i] = new_arg;
   2406       } else {
   2407         FPRegister old_arg = FPRegister(args[i]);
   2408         FPRegister new_arg = temps.AcquireSameSizeAs(old_arg);
   2409         Fmov(new_arg, old_arg);
   2410         args[i] = new_arg;
   2411       }
   2412     }
   2413   }
   2414 
   2415   // Do a second pass to move values into their final positions and perform any
   2416   // conversions that may be required.
   2417   for (int i = 0; i < arg_count; i++) {
   2418     VIXL_ASSERT(pcs[i].GetType() == args[i].GetType());
   2419     if (pcs[i].IsRegister()) {
   2420       Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg);
   2421     } else {
   2422       VIXL_ASSERT(pcs[i].IsVRegister());
   2423       if (pcs[i].GetSizeInBits() == args[i].GetSizeInBits()) {
   2424         Fmov(FPRegister(pcs[i]), FPRegister(args[i]));
   2425       } else {
   2426         Fcvt(FPRegister(pcs[i]), FPRegister(args[i]));
   2427       }
   2428     }
   2429   }
   2430 
   2431   // Load the format string into x0, as per the procedure-call standard.
   2432   //
   2433   // To make the code as portable as possible, the format string is encoded
   2434   // directly in the instruction stream. It might be cleaner to encode it in a
   2435   // literal pool, but since Printf is usually used for debugging, it is
   2436   // beneficial for it to be minimally dependent on other features.
   2437   temps.Exclude(x0);
   2438   Label format_address;
   2439   Adr(x0, &format_address);
   2440 
   2441   // Emit the format string directly in the instruction stream.
   2442   {
   2443     BlockPoolsScope scope(this);
   2444     // Data emitted:
   2445     //   branch
   2446     //   strlen(format) + 1 (includes null termination)
   2447     //   padding to next instruction
   2448     //   unreachable
   2449     EmissionCheckScope guard(this,
   2450                              AlignUp(strlen(format) + 1, kInstructionSize) +
   2451                                  2 * kInstructionSize);
   2452     Label after_data;
   2453     B(&after_data);
   2454     Bind(&format_address);
   2455     EmitString(format);
   2456     Unreachable();
   2457     Bind(&after_data);
   2458   }
   2459 
   2460   // We don't pass any arguments on the stack, but we still need to align the C
   2461   // stack pointer to a 16-byte boundary for PCS compliance.
   2462   if (!sp.Is(StackPointer())) {
   2463     Bic(sp, StackPointer(), 0xf);
   2464   }
   2465 
   2466   // Actually call printf. This part needs special handling for the simulator,
   2467   // since the system printf function will use a different instruction set and
   2468   // the procedure-call standard will not be compatible.
   2469   if (generate_simulator_code_) {
   2470     ExactAssemblyScope scope(this, kPrintfLength);
   2471     hlt(kPrintfOpcode);
   2472     dc32(arg_count);  // kPrintfArgCountOffset
   2473 
   2474     // Determine the argument pattern.
   2475     uint32_t arg_pattern_list = 0;
   2476     for (int i = 0; i < arg_count; i++) {
   2477       uint32_t arg_pattern;
   2478       if (pcs[i].IsRegister()) {
   2479         arg_pattern = pcs[i].Is32Bits() ? kPrintfArgW : kPrintfArgX;
   2480       } else {
   2481         VIXL_ASSERT(pcs[i].Is64Bits());
   2482         arg_pattern = kPrintfArgD;
   2483       }
   2484       VIXL_ASSERT(arg_pattern < (1 << kPrintfArgPatternBits));
   2485       arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i));
   2486     }
   2487     dc32(arg_pattern_list);  // kPrintfArgPatternListOffset
   2488   } else {
   2489     Register tmp = temps.AcquireX();
   2490     Mov(tmp, reinterpret_cast<uintptr_t>(printf));
   2491     Blr(tmp);
   2492   }
   2493 }
   2494 
   2495 
   2496 void MacroAssembler::Printf(const char* format,
   2497                             CPURegister arg0,
   2498                             CPURegister arg1,
   2499                             CPURegister arg2,
   2500                             CPURegister arg3) {
   2501   // We can only print sp if it is the current stack pointer.
   2502   if (!sp.Is(StackPointer())) {
   2503     VIXL_ASSERT(!sp.Aliases(arg0));
   2504     VIXL_ASSERT(!sp.Aliases(arg1));
   2505     VIXL_ASSERT(!sp.Aliases(arg2));
   2506     VIXL_ASSERT(!sp.Aliases(arg3));
   2507   }
   2508 
   2509   // Make sure that the macro assembler doesn't try to use any of our arguments
   2510   // as scratch registers.
   2511   UseScratchRegisterScope exclude_all(this);
   2512   exclude_all.ExcludeAll();
   2513 
   2514   // Preserve all caller-saved registers as well as NZCV.
   2515   // If sp is the stack pointer, PushCPURegList asserts that the size of each
   2516   // list is a multiple of 16 bytes.
   2517   PushCPURegList(kCallerSaved);
   2518   PushCPURegList(kCallerSavedV);
   2519 
   2520   {
   2521     UseScratchRegisterScope temps(this);
   2522     // We can use caller-saved registers as scratch values (except for argN).
   2523     temps.Include(kCallerSaved);
   2524     temps.Include(kCallerSavedV);
   2525     temps.Exclude(arg0, arg1, arg2, arg3);
   2526 
   2527     // If any of the arguments are the current stack pointer, allocate a new
   2528     // register for them, and adjust the value to compensate for pushing the
   2529     // caller-saved registers.
   2530     bool arg0_sp = StackPointer().Aliases(arg0);
   2531     bool arg1_sp = StackPointer().Aliases(arg1);
   2532     bool arg2_sp = StackPointer().Aliases(arg2);
   2533     bool arg3_sp = StackPointer().Aliases(arg3);
   2534     if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
   2535       // Allocate a register to hold the original stack pointer value, to pass
   2536       // to PrintfNoPreserve as an argument.
   2537       Register arg_sp = temps.AcquireX();
   2538       Add(arg_sp,
   2539           StackPointer(),
   2540           kCallerSaved.GetTotalSizeInBytes() +
   2541               kCallerSavedV.GetTotalSizeInBytes());
   2542       if (arg0_sp) arg0 = Register(arg_sp.GetCode(), arg0.GetSizeInBits());
   2543       if (arg1_sp) arg1 = Register(arg_sp.GetCode(), arg1.GetSizeInBits());
   2544       if (arg2_sp) arg2 = Register(arg_sp.GetCode(), arg2.GetSizeInBits());
   2545       if (arg3_sp) arg3 = Register(arg_sp.GetCode(), arg3.GetSizeInBits());
   2546     }
   2547 
   2548     // Preserve NZCV.
   2549     Register tmp = temps.AcquireX();
   2550     Mrs(tmp, NZCV);
   2551     Push(tmp, xzr);
   2552     temps.Release(tmp);
   2553 
   2554     PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
   2555 
   2556     // Restore NZCV.
   2557     tmp = temps.AcquireX();
   2558     Pop(xzr, tmp);
   2559     Msr(NZCV, tmp);
   2560     temps.Release(tmp);
   2561   }
   2562 
   2563   PopCPURegList(kCallerSavedV);
   2564   PopCPURegList(kCallerSaved);
   2565 }
   2566 
   2567 void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
   2568   VIXL_ASSERT(allow_macro_instructions_);
   2569 
   2570   if (generate_simulator_code_) {
   2571     // The arguments to the trace pseudo instruction need to be contiguous in
   2572     // memory, so make sure we don't try to emit a literal pool.
   2573     ExactAssemblyScope scope(this, kTraceLength);
   2574 
   2575     Label start;
   2576     bind(&start);
   2577 
   2578     // Refer to simulator-aarch64.h for a description of the marker and its
   2579     // arguments.
   2580     hlt(kTraceOpcode);
   2581 
   2582     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
   2583     dc32(parameters);
   2584 
   2585     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
   2586     dc32(command);
   2587   } else {
   2588     // Emit nothing on real hardware.
   2589     USE(parameters, command);
   2590   }
   2591 }
   2592 
   2593 
   2594 void MacroAssembler::Log(TraceParameters parameters) {
   2595   VIXL_ASSERT(allow_macro_instructions_);
   2596 
   2597   if (generate_simulator_code_) {
   2598     // The arguments to the log pseudo instruction need to be contiguous in
   2599     // memory, so make sure we don't try to emit a literal pool.
   2600     ExactAssemblyScope scope(this, kLogLength);
   2601 
   2602     Label start;
   2603     bind(&start);
   2604 
   2605     // Refer to simulator-aarch64.h for a description of the marker and its
   2606     // arguments.
   2607     hlt(kLogOpcode);
   2608 
   2609     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
   2610     dc32(parameters);
   2611   } else {
   2612     // Emit nothing on real hardware.
   2613     USE(parameters);
   2614   }
   2615 }
   2616 
   2617 
   2618 void MacroAssembler::EnableInstrumentation() {
   2619   VIXL_ASSERT(!isprint(InstrumentStateEnable));
   2620   ExactAssemblyScope scope(this, kInstructionSize);
   2621   movn(xzr, InstrumentStateEnable);
   2622 }
   2623 
   2624 
   2625 void MacroAssembler::DisableInstrumentation() {
   2626   VIXL_ASSERT(!isprint(InstrumentStateDisable));
   2627   ExactAssemblyScope scope(this, kInstructionSize);
   2628   movn(xzr, InstrumentStateDisable);
   2629 }
   2630 
   2631 
   2632 void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
   2633   VIXL_ASSERT(strlen(marker_name) == 2);
   2634 
   2635   // We allow only printable characters in the marker names. Unprintable
   2636   // characters are reserved for controlling features of the instrumentation.
   2637   VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
   2638 
   2639   ExactAssemblyScope scope(this, kInstructionSize);
   2640   movn(xzr, (marker_name[1] << 8) | marker_name[0]);
   2641 }
   2642 
   2643 
   2644 void UseScratchRegisterScope::Open(MacroAssembler* masm) {
   2645   VIXL_ASSERT(masm_ == NULL);
   2646   VIXL_ASSERT(masm != NULL);
   2647   masm_ = masm;
   2648 
   2649   CPURegList* available = masm->GetScratchRegisterList();
   2650   CPURegList* available_fp = masm->GetScratchFPRegisterList();
   2651   old_available_ = available->GetList();
   2652   old_availablefp_ = available_fp->GetList();
   2653   VIXL_ASSERT(available->GetType() == CPURegister::kRegister);
   2654   VIXL_ASSERT(available_fp->GetType() == CPURegister::kVRegister);
   2655 
   2656   parent_ = masm->GetCurrentScratchRegisterScope();
   2657   masm->SetCurrentScratchRegisterScope(this);
   2658 }
   2659 
   2660 
   2661 void UseScratchRegisterScope::Close() {
   2662   if (masm_ != NULL) {
   2663     // Ensure that scopes nest perfectly, and do not outlive their parents.
   2664     // This is a run-time check because the order of destruction of objects in
   2665     // the _same_ scope is implementation-defined, and is likely to change in
   2666     // optimised builds.
   2667     VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
   2668     masm_->SetCurrentScratchRegisterScope(parent_);
   2669 
   2670     masm_->GetScratchRegisterList()->SetList(old_available_);
   2671     masm_->GetScratchFPRegisterList()->SetList(old_availablefp_);
   2672 
   2673     masm_ = NULL;
   2674   }
   2675 }
   2676 
   2677 
   2678 bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
   2679   return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) ||
   2680          masm_->GetScratchFPRegisterList()->IncludesAliasOf(reg);
   2681 }
   2682 
   2683 
   2684 Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) {
   2685   int code = AcquireNextAvailable(masm_->GetScratchRegisterList()).GetCode();
   2686   return Register(code, size_in_bits);
   2687 }
   2688 
   2689 
   2690 FPRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) {
   2691   int code = AcquireNextAvailable(masm_->GetScratchFPRegisterList()).GetCode();
   2692   return FPRegister(code, size_in_bits);
   2693 }
   2694 
   2695 
   2696 void UseScratchRegisterScope::Release(const CPURegister& reg) {
   2697   VIXL_ASSERT(masm_ != NULL);
   2698   if (reg.IsRegister()) {
   2699     ReleaseByCode(masm_->GetScratchRegisterList(), reg.GetCode());
   2700   } else if (reg.IsVRegister()) {
   2701     ReleaseByCode(masm_->GetScratchFPRegisterList(), reg.GetCode());
   2702   } else {
   2703     VIXL_ASSERT(reg.IsNone());
   2704   }
   2705 }
   2706 
   2707 
   2708 void UseScratchRegisterScope::Include(const CPURegList& list) {
   2709   VIXL_ASSERT(masm_ != NULL);
   2710   if (list.GetType() == CPURegister::kRegister) {
   2711     // Make sure that neither sp nor xzr are included the list.
   2712     IncludeByRegList(masm_->GetScratchRegisterList(),
   2713                      list.GetList() & ~(xzr.GetBit() | sp.GetBit()));
   2714   } else {
   2715     VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
   2716     IncludeByRegList(masm_->GetScratchFPRegisterList(), list.GetList());
   2717   }
   2718 }
   2719 
   2720 
   2721 void UseScratchRegisterScope::Include(const Register& reg1,
   2722                                       const Register& reg2,
   2723                                       const Register& reg3,
   2724                                       const Register& reg4) {
   2725   VIXL_ASSERT(masm_ != NULL);
   2726   RegList include =
   2727       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   2728   // Make sure that neither sp nor xzr are included the list.
   2729   include &= ~(xzr.GetBit() | sp.GetBit());
   2730 
   2731   IncludeByRegList(masm_->GetScratchRegisterList(), include);
   2732 }
   2733 
   2734 
   2735 void UseScratchRegisterScope::Include(const FPRegister& reg1,
   2736                                       const FPRegister& reg2,
   2737                                       const FPRegister& reg3,
   2738                                       const FPRegister& reg4) {
   2739   RegList include =
   2740       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   2741   IncludeByRegList(masm_->GetScratchFPRegisterList(), include);
   2742 }
   2743 
   2744 
   2745 void UseScratchRegisterScope::Exclude(const CPURegList& list) {
   2746   if (list.GetType() == CPURegister::kRegister) {
   2747     ExcludeByRegList(masm_->GetScratchRegisterList(), list.GetList());
   2748   } else {
   2749     VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
   2750     ExcludeByRegList(masm_->GetScratchFPRegisterList(), list.GetList());
   2751   }
   2752 }
   2753 
   2754 
   2755 void UseScratchRegisterScope::Exclude(const Register& reg1,
   2756                                       const Register& reg2,
   2757                                       const Register& reg3,
   2758                                       const Register& reg4) {
   2759   RegList exclude =
   2760       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   2761   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
   2762 }
   2763 
   2764 
   2765 void UseScratchRegisterScope::Exclude(const FPRegister& reg1,
   2766                                       const FPRegister& reg2,
   2767                                       const FPRegister& reg3,
   2768                                       const FPRegister& reg4) {
   2769   RegList excludefp =
   2770       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   2771   ExcludeByRegList(masm_->GetScratchFPRegisterList(), excludefp);
   2772 }
   2773 
   2774 
   2775 void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
   2776                                       const CPURegister& reg2,
   2777                                       const CPURegister& reg3,
   2778                                       const CPURegister& reg4) {
   2779   RegList exclude = 0;
   2780   RegList excludefp = 0;
   2781 
   2782   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
   2783 
   2784   for (unsigned i = 0; i < (sizeof(regs) / sizeof(regs[0])); i++) {
   2785     if (regs[i].IsRegister()) {
   2786       exclude |= regs[i].GetBit();
   2787     } else if (regs[i].IsFPRegister()) {
   2788       excludefp |= regs[i].GetBit();
   2789     } else {
   2790       VIXL_ASSERT(regs[i].IsNone());
   2791     }
   2792   }
   2793 
   2794   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
   2795   ExcludeByRegList(masm_->GetScratchFPRegisterList(), excludefp);
   2796 }
   2797 
   2798 
   2799 void UseScratchRegisterScope::ExcludeAll() {
   2800   ExcludeByRegList(masm_->GetScratchRegisterList(),
   2801                    masm_->GetScratchRegisterList()->GetList());
   2802   ExcludeByRegList(masm_->GetScratchFPRegisterList(),
   2803                    masm_->GetScratchFPRegisterList()->GetList());
   2804 }
   2805 
   2806 
   2807 CPURegister UseScratchRegisterScope::AcquireNextAvailable(
   2808     CPURegList* available) {
   2809   VIXL_CHECK(!available->IsEmpty());
   2810   CPURegister result = available->PopLowestIndex();
   2811   VIXL_ASSERT(!AreAliased(result, xzr, sp));
   2812   return result;
   2813 }
   2814 
   2815 
   2816 void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
   2817   ReleaseByRegList(available, static_cast<RegList>(1) << code);
   2818 }
   2819 
   2820 
   2821 void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
   2822                                                RegList regs) {
   2823   available->SetList(available->GetList() | regs);
   2824 }
   2825 
   2826 
   2827 void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
   2828                                                RegList regs) {
   2829   available->SetList(available->GetList() | regs);
   2830 }
   2831 
   2832 
   2833 void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
   2834                                                RegList exclude) {
   2835   available->SetList(available->GetList() & ~exclude);
   2836 }
   2837 
   2838 }  // namespace aarch64
   2839 }  // namespace vixl
   2840