Home | History | Annotate | Download | only in aarch32
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright
     10 //     notice, this list of conditions and the following disclaimer in the
     11 //     documentation and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may
     13 //     be used to endorse or promote products derived from this software
     14 //     without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     20 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26 // POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "aarch32/macro-assembler-aarch32.h"
     29 
     30 #define STRINGIFY(x) #x
     31 #define TOSTRING(x) STRINGIFY(x)
     32 
     33 #define CONTEXT_SCOPE \
     34   ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
     35 
     36 namespace vixl {
     37 namespace aarch32 {
     38 
     39 // We use a subclass to access the protected `ExactAssemblyScope` constructor
     40 // giving us control over the pools, and make the constructor private to limit
     41 // usage to code paths emitting pools.
     42 class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
     43  private:
     44   ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm,
     45                                       size_t size,
     46                                       SizePolicy size_policy = kExactSize)
     47       : ExactAssemblyScope(masm,
     48                            size,
     49                            size_policy,
     50                            ExactAssemblyScope::kIgnorePools) {}
     51 
     52   friend class MacroAssembler;
     53   friend class VeneerPoolManager;
     54 };
     55 
     56 
     57 void UseScratchRegisterScope::Open(MacroAssembler* masm) {
     58   VIXL_ASSERT(masm_ == NULL);
     59   VIXL_ASSERT(masm != NULL);
     60   masm_ = masm;
     61 
     62   old_available_ = masm_->GetScratchRegisterList()->GetList();
     63   old_available_vfp_ = masm_->GetScratchVRegisterList()->GetList();
     64 
     65   parent_ = masm->GetCurrentScratchRegisterScope();
     66   masm->SetCurrentScratchRegisterScope(this);
     67 }
     68 
     69 
     70 void UseScratchRegisterScope::Close() {
     71   if (masm_ != NULL) {
     72     // Ensure that scopes nest perfectly, and do not outlive their parents.
     73     // This is a run-time check because the order of destruction of objects in
     74     // the _same_ scope is implementation-defined, and is likely to change in
     75     // optimised builds.
     76     VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
     77     masm_->SetCurrentScratchRegisterScope(parent_);
     78 
     79     masm_->GetScratchRegisterList()->SetList(old_available_);
     80     masm_->GetScratchVRegisterList()->SetList(old_available_vfp_);
     81 
     82     masm_ = NULL;
     83   }
     84 }
     85 
     86 
     87 bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
     88   VIXL_ASSERT(masm_ != NULL);
     89   VIXL_ASSERT(reg.IsValid());
     90   return masm_->GetScratchRegisterList()->Includes(reg);
     91 }
     92 
     93 
     94 bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
     95   VIXL_ASSERT(masm_ != NULL);
     96   VIXL_ASSERT(reg.IsValid());
     97   return masm_->GetScratchVRegisterList()->IncludesAllOf(reg);
     98 }
     99 
    100 
    101 Register UseScratchRegisterScope::Acquire() {
    102   VIXL_ASSERT(masm_ != NULL);
    103   Register reg = masm_->GetScratchRegisterList()->GetFirstAvailableRegister();
    104   VIXL_CHECK(reg.IsValid());
    105   masm_->GetScratchRegisterList()->Remove(reg);
    106   return reg;
    107 }
    108 
    109 
    110 VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
    111   switch (size_in_bits) {
    112     case kSRegSizeInBits:
    113       return AcquireS();
    114     case kDRegSizeInBits:
    115       return AcquireD();
    116     case kQRegSizeInBits:
    117       return AcquireQ();
    118     default:
    119       VIXL_UNREACHABLE();
    120       return NoVReg;
    121   }
    122 }
    123 
    124 
    125 QRegister UseScratchRegisterScope::AcquireQ() {
    126   VIXL_ASSERT(masm_ != NULL);
    127   QRegister reg =
    128       masm_->GetScratchVRegisterList()->GetFirstAvailableQRegister();
    129   VIXL_CHECK(reg.IsValid());
    130   masm_->GetScratchVRegisterList()->Remove(reg);
    131   return reg;
    132 }
    133 
    134 
    135 DRegister UseScratchRegisterScope::AcquireD() {
    136   VIXL_ASSERT(masm_ != NULL);
    137   DRegister reg =
    138       masm_->GetScratchVRegisterList()->GetFirstAvailableDRegister();
    139   VIXL_CHECK(reg.IsValid());
    140   masm_->GetScratchVRegisterList()->Remove(reg);
    141   return reg;
    142 }
    143 
    144 
    145 SRegister UseScratchRegisterScope::AcquireS() {
    146   VIXL_ASSERT(masm_ != NULL);
    147   SRegister reg =
    148       masm_->GetScratchVRegisterList()->GetFirstAvailableSRegister();
    149   VIXL_CHECK(reg.IsValid());
    150   masm_->GetScratchVRegisterList()->Remove(reg);
    151   return reg;
    152 }
    153 
    154 
    155 void UseScratchRegisterScope::Release(const Register& reg) {
    156   VIXL_ASSERT(masm_ != NULL);
    157   VIXL_ASSERT(reg.IsValid());
    158   VIXL_ASSERT(!masm_->GetScratchRegisterList()->Includes(reg));
    159   masm_->GetScratchRegisterList()->Combine(reg);
    160 }
    161 
    162 
    163 void UseScratchRegisterScope::Release(const VRegister& reg) {
    164   VIXL_ASSERT(masm_ != NULL);
    165   VIXL_ASSERT(reg.IsValid());
    166   VIXL_ASSERT(!masm_->GetScratchVRegisterList()->IncludesAliasOf(reg));
    167   masm_->GetScratchVRegisterList()->Combine(reg);
    168 }
    169 
    170 
    171 void UseScratchRegisterScope::Include(const RegisterList& list) {
    172   VIXL_ASSERT(masm_ != NULL);
    173   RegisterList excluded_registers(sp, lr, pc);
    174   uint32_t mask = list.GetList() & ~excluded_registers.GetList();
    175   RegisterList* available = masm_->GetScratchRegisterList();
    176   available->SetList(available->GetList() | mask);
    177 }
    178 
    179 
    180 void UseScratchRegisterScope::Include(const VRegisterList& list) {
    181   VIXL_ASSERT(masm_ != NULL);
    182   VRegisterList* available = masm_->GetScratchVRegisterList();
    183   available->SetList(available->GetList() | list.GetList());
    184 }
    185 
    186 
    187 void UseScratchRegisterScope::Exclude(const RegisterList& list) {
    188   VIXL_ASSERT(masm_ != NULL);
    189   RegisterList* available = masm_->GetScratchRegisterList();
    190   available->SetList(available->GetList() & ~list.GetList());
    191 }
    192 
    193 
    194 void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
    195   VIXL_ASSERT(masm_ != NULL);
    196   VRegisterList* available = masm_->GetScratchVRegisterList();
    197   available->SetList(available->GetList() & ~list.GetList());
    198 }
    199 
    200 
    201 void UseScratchRegisterScope::Exclude(const Operand& operand) {
    202   if (operand.IsImmediateShiftedRegister()) {
    203     Exclude(operand.GetBaseRegister());
    204   } else if (operand.IsRegisterShiftedRegister()) {
    205     Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
    206   } else {
    207     VIXL_ASSERT(operand.IsImmediate());
    208   }
    209 }
    210 
    211 
    212 void UseScratchRegisterScope::ExcludeAll() {
    213   VIXL_ASSERT(masm_ != NULL);
    214   masm_->GetScratchRegisterList()->SetList(0);
    215   masm_->GetScratchVRegisterList()->SetList(0);
    216 }
    217 
    218 
    219 void VeneerPoolManager::AddLabel(Label* label) {
    220   if (last_label_reference_offset_ != 0) {
    221     // If the pool grows faster than the instruction stream, we must adjust
    222     // the checkpoint to compensate. The veneer pool entries take 32 bits, so
    223     // this can only occur when two consecutive 16-bit instructions add veneer
    224     // pool entries.
    225     // This is typically the case for cbz and cbnz (other forward branches
    226     // have a 32 bit variant which is always used).
    227     if (last_label_reference_offset_ + 2 * k16BitT32InstructionSizeInBytes ==
    228         static_cast<uint32_t>(masm_->GetCursorOffset())) {
    229       // We found two 16 bit forward branches generated one after the other.
    230       // That means that the pool will grow by one 32-bit branch when
    231       // the cursor offset will move forward by only one 16-bit branch.
    232       // Update the near checkpoint margin to manage the difference.
    233       near_checkpoint_margin_ +=
    234           k32BitT32InstructionSizeInBytes - k16BitT32InstructionSizeInBytes;
    235     }
    236   }
    237   Label::ForwardReference& back = label->GetBackForwardRef();
    238   VIXL_ASSERT(back.GetMaxForwardDistance() >= kCbzCbnzRange);
    239   if (!label->IsInVeneerPool()) {
    240     if (back.GetMaxForwardDistance() <= kNearLabelRange) {
    241       near_labels_.push_back(label);
    242       label->SetVeneerPoolManager(this, true);
    243     } else {
    244       far_labels_.push_back(label);
    245       label->SetVeneerPoolManager(this, false);
    246     }
    247   } else if (back.GetMaxForwardDistance() <= kNearLabelRange) {
    248     if (!label->IsNear()) {
    249       far_labels_.remove(label);
    250       near_labels_.push_back(label);
    251       label->SetVeneerPoolManager(this, true);
    252     }
    253   }
    254 
    255   back.SetIsBranch();
    256   last_label_reference_offset_ = back.GetLocation();
    257   label->UpdateCheckpoint();
    258   Label::Offset tmp = label->GetCheckpoint();
    259   if (label->IsNear()) {
    260     if (near_checkpoint_ > tmp) near_checkpoint_ = tmp;
    261     if (max_near_checkpoint_ >= tmp) {
    262       // This checkpoint is before some already in the near list. That means
    263       // that the veneer (if needed) will be emitted before some of the veneers
    264       // already in the list. We adjust the margin with the size of a veneer
    265       // branch.
    266       near_checkpoint_margin_ += k32BitT32InstructionSizeInBytes;
    267     } else {
    268       max_near_checkpoint_ = tmp;
    269     }
    270   } else {
    271     if (far_checkpoint_ > tmp) far_checkpoint_ = tmp;
    272   }
    273   // Always compute the global checkpoint as, adding veneers shorten the
    274   // literals' checkpoint.
    275   masm_->ComputeCheckpoint();
    276 }
    277 
    278 
    279 void VeneerPoolManager::RemoveLabel(Label* label) {
    280   label->ClearVeneerPoolManager();
    281   std::list<Label*>& list = label->IsNear() ? near_labels_ : far_labels_;
    282   Label::Offset* checkpoint_reference =
    283       label->IsNear() ? &near_checkpoint_ : &far_checkpoint_;
    284   if (label->GetCheckpoint() == *checkpoint_reference) {
    285     // We have to compute checkpoint again.
    286     *checkpoint_reference = Label::kMaxOffset;
    287     for (std::list<Label*>::iterator it = list.begin(); it != list.end();) {
    288       if (*it == label) {
    289         it = list.erase(it);
    290       } else {
    291         *checkpoint_reference =
    292             std::min(*checkpoint_reference, (*it)->GetCheckpoint());
    293         ++it;
    294       }
    295     }
    296     masm_->ComputeCheckpoint();
    297   } else {
    298     // We only have to remove the label from the list.
    299     list.remove(label);
    300   }
    301 }
    302 
    303 
    304 void VeneerPoolManager::EmitLabel(Label* label, Label::Offset emitted_target) {
    305   VIXL_ASSERT(!IsBlocked());
    306   // Define the veneer.
    307   Label veneer;
    308   masm_->Bind(&veneer);
    309   Label::Offset label_checkpoint = Label::kMaxOffset;
    310   // Check all uses of this label.
    311   for (Label::ForwardRefList::iterator ref = label->GetFirstForwardRef();
    312        ref != label->GetEndForwardRef();) {
    313     if (ref->IsBranch()) {
    314       if (ref->GetCheckpoint() <= emitted_target) {
    315         // Use the veneer.
    316         masm_->EncodeLabelFor(*ref, &veneer);
    317         ref = label->Erase(ref);
    318       } else {
    319         // Don't use the veneer => update checkpoint.
    320         label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
    321         ++ref;
    322       }
    323     } else {
    324       ++ref;
    325     }
    326   }
    327   label->SetCheckpoint(label_checkpoint);
    328   if (label->IsNear()) {
    329     near_checkpoint_ = std::min(near_checkpoint_, label_checkpoint);
    330   } else {
    331     far_checkpoint_ = std::min(far_checkpoint_, label_checkpoint);
    332   }
    333   // Generate the veneer.
    334   ExactAssemblyScopeWithoutPoolsCheck guard(masm_,
    335                                             kMaxInstructionSizeInBytes,
    336                                             ExactAssemblyScope::kMaximumSize);
    337   masm_->b(label);
    338   masm_->AddBranchLabel(label);
    339 }
    340 
    341 
    342 void VeneerPoolManager::Emit(Label::Offset target) {
    343   VIXL_ASSERT(!IsBlocked());
    344   // Sort labels (regarding their checkpoint) to avoid that a veneer
    345   // becomes out of range.
    346   near_labels_.sort(Label::CompareLabels);
    347   far_labels_.sort(Label::CompareLabels);
    348   // To avoid too many veneers, generate veneers which will be necessary soon.
    349   target += static_cast<int>(GetMaxSize()) + near_checkpoint_margin_;
    350   static const size_t kVeneerEmissionMargin = 1 * KBytes;
    351   // To avoid too many veneers, use generated veneers for other not too far
    352   // uses.
    353   static const size_t kVeneerEmittedMargin = 2 * KBytes;
    354   Label::Offset emitted_target = target + kVeneerEmittedMargin;
    355   target += kVeneerEmissionMargin;
    356   // Reset the checkpoints. They will be computed again in the loop.
    357   near_checkpoint_ = Label::kMaxOffset;
    358   far_checkpoint_ = Label::kMaxOffset;
    359   max_near_checkpoint_ = 0;
    360   near_checkpoint_margin_ = 0;
    361   for (std::list<Label*>::iterator it = near_labels_.begin();
    362        it != near_labels_.end();) {
    363     Label* label = *it;
    364     // Move the label from the near list to the far list as it will be needed in
    365     // the far list (as the veneer will generate a far branch).
    366     // The label is pushed at the end of the list. The list remains sorted as
    367     // we use an unconditional jump which has the biggest range. However, it
    368     // wouldn't be a problem if the items at the end of the list were not
    369     // sorted as they won't be used by this generation (their range will be
    370     // greater than kVeneerEmittedMargin).
    371     it = near_labels_.erase(it);
    372     far_labels_.push_back(label);
    373     label->SetVeneerPoolManager(this, false);
    374     EmitLabel(label, emitted_target);
    375   }
    376   for (std::list<Label*>::iterator it = far_labels_.begin();
    377        it != far_labels_.end();) {
    378     // The labels are sorted. As soon as a veneer is not needed, we can stop.
    379     if ((*it)->GetCheckpoint() > target) {
    380       far_checkpoint_ = std::min(far_checkpoint_, (*it)->GetCheckpoint());
    381       break;
    382     }
    383     // Even if we no longer have use of this label, we can keep it in the list
    384     // as the next "B" would add it back.
    385     EmitLabel(*it, emitted_target);
    386     ++it;
    387   }
    388 #ifdef VIXL_DEBUG
    389   for (std::list<Label*>::iterator it = near_labels_.begin();
    390        it != near_labels_.end();
    391        ++it) {
    392     VIXL_ASSERT((*it)->GetCheckpoint() >= near_checkpoint_);
    393   }
    394   for (std::list<Label*>::iterator it = far_labels_.begin();
    395        it != far_labels_.end();
    396        ++it) {
    397     VIXL_ASSERT((*it)->GetCheckpoint() >= far_checkpoint_);
    398   }
    399 #endif
    400   masm_->ComputeCheckpoint();
    401 }
    402 
    403 
    404 void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
    405   EmitOption option = kBranchRequired;
    406   Label after_pools;
    407   Label::Offset literal_target = GetTargetForLiteralEmission();
    408   VIXL_ASSERT(literal_target >= 0);
    409   bool generate_veneers = target > veneer_pool_manager_.GetCheckpoint();
    410   if (target > literal_target) {
    411     // We will generate the literal pool. Generate all the veneers which
    412     // would become out of range.
    413     size_t literal_pool_size =
    414         literal_pool_manager_.GetLiteralPoolSize() + kMaxInstructionSizeInBytes;
    415     VIXL_ASSERT(IsInt32(literal_pool_size));
    416     Label::Offset veneers_target =
    417         AlignUp(target + static_cast<Label::Offset>(literal_pool_size), 4);
    418     VIXL_ASSERT(veneers_target >= 0);
    419     if (veneers_target > veneer_pool_manager_.GetCheckpoint()) {
    420       generate_veneers = true;
    421     }
    422   }
    423   if (!IsVeneerPoolBlocked() && generate_veneers) {
    424     {
    425       ExactAssemblyScopeWithoutPoolsCheck
    426           guard(this,
    427                 kMaxInstructionSizeInBytes,
    428                 ExactAssemblyScope::kMaximumSize);
    429       b(&after_pools);
    430     }
    431     veneer_pool_manager_.Emit(target);
    432     option = kNoBranchRequired;
    433   }
    434   // Check if the macro-assembler's internal literal pool should be emitted
    435   // to avoid any overflow. If we already generated the veneers, we can
    436   // emit the pool (the branch is already done).
    437   if (!IsLiteralPoolBlocked() &&
    438       ((target > literal_target) || (option == kNoBranchRequired))) {
    439     EmitLiteralPool(option);
    440   }
    441   BindHelper(&after_pools);
    442   if (GetBuffer()->IsManaged()) {
    443     bool grow_requested;
    444     GetBuffer()->EnsureSpaceFor(size, &grow_requested);
    445     if (grow_requested) ComputeCheckpoint();
    446   }
    447 }
    448 
    449 
    450 void MacroAssembler::ComputeCheckpoint() {
    451   checkpoint_ = AlignDown(std::min(veneer_pool_manager_.GetCheckpoint(),
    452                                    GetTargetForLiteralEmission()),
    453                           4);
    454   size_t buffer_size = GetBuffer()->GetCapacity();
    455   VIXL_ASSERT(IsInt32(buffer_size));
    456   Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
    457   checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
    458 }
    459 
    460 
    461 void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
    462                                      EmitOption option) {
    463   VIXL_ASSERT(!IsLiteralPoolBlocked());
    464   if (literal_pool->GetSize() > 0) {
    465 #ifdef VIXL_DEBUG
    466     for (LiteralPool::RawLiteralListIterator literal_it =
    467              literal_pool->GetFirst();
    468          literal_it != literal_pool->GetEnd();
    469          literal_it++) {
    470       RawLiteral* literal = *literal_it;
    471       VIXL_ASSERT(GetCursorOffset() < literal->GetCheckpoint());
    472     }
    473 #endif
    474     Label after_literal;
    475     if (option == kBranchRequired) {
    476       GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
    477       VIXL_ASSERT(!AllowAssembler());
    478       {
    479         ExactAssemblyScopeWithoutPoolsCheck
    480             guard(this,
    481                   kMaxInstructionSizeInBytes,
    482                   ExactAssemblyScope::kMaximumSize);
    483         b(&after_literal);
    484       }
    485     }
    486     GetBuffer()->Align();
    487     GetBuffer()->EnsureSpaceFor(literal_pool->GetSize());
    488     for (LiteralPool::RawLiteralListIterator it = literal_pool->GetFirst();
    489          it != literal_pool->GetEnd();
    490          it++) {
    491       PlaceHelper(*it);
    492       GetBuffer()->Align();
    493     }
    494     if (option == kBranchRequired) BindHelper(&after_literal);
    495     literal_pool->Clear();
    496   }
    497 }
    498 
    499 
    500 void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
    501                                                 Register tmp,
    502                                                 uint32_t imm) {
    503   if (IsUintN(16, imm)) {
    504     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
    505     mov(cond, tmp, imm & 0xffff);
    506     return;
    507   }
    508   if (IsUsingT32()) {
    509     if (ImmediateT32::IsImmediateT32(~imm)) {
    510       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
    511       mvn(cond, tmp, ~imm);
    512       return;
    513     }
    514   } else {
    515     if (ImmediateA32::IsImmediateA32(~imm)) {
    516       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
    517       mvn(cond, tmp, ~imm);
    518       return;
    519     }
    520   }
    521   CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
    522   mov(cond, tmp, imm & 0xffff);
    523   movt(cond, tmp, imm >> 16);
    524 }
    525 
    526 
    527 void MacroAssembler::PadToMinimumBranchRange(Label* label) {
    528   const Label::ForwardReference* last_reference = label->GetForwardRefBack();
    529   if ((last_reference != NULL) && last_reference->IsUsingT32()) {
    530     uint32_t location = last_reference->GetLocation();
    531     if (location + k16BitT32InstructionSizeInBytes ==
    532         static_cast<uint32_t>(GetCursorOffset())) {
    533       uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
    534       if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
    535         VIXL_ASSERT(!InITBlock());
    536         // A Cbz or a Cbnz can't jump immediately after the instruction. If the
    537         // target is immediately after the Cbz or Cbnz, we insert a nop to
    538         // avoid that.
    539         EmitT32_16(k16BitT32NopOpcode);
    540       }
    541     }
    542   }
    543 }
    544 
    545 
    546 MemOperand MacroAssembler::MemOperandComputationHelper(
    547     Condition cond,
    548     Register scratch,
    549     Register base,
    550     uint32_t offset,
    551     uint32_t extra_offset_mask) {
    552   VIXL_ASSERT(!AliasesAvailableScratchRegister(scratch));
    553   VIXL_ASSERT(!AliasesAvailableScratchRegister(base));
    554   VIXL_ASSERT(allow_macro_instructions_);
    555   VIXL_ASSERT(OutsideITBlock());
    556 
    557   // Check for the simple pass-through case.
    558   if ((offset & extra_offset_mask) == offset) return MemOperand(base, offset);
    559 
    560   MacroEmissionCheckScope guard(this);
    561   ITScope it_scope(this, &cond);
    562 
    563   uint32_t load_store_offset = offset & extra_offset_mask;
    564   uint32_t add_offset = offset & ~extra_offset_mask;
    565   if ((add_offset != 0) &&
    566       (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
    567     load_store_offset = 0;
    568     add_offset = offset;
    569   }
    570 
    571   if (base.IsPC()) {
    572     // Special handling for PC bases. We must read the PC in the first
    573     // instruction (and only in that instruction), and we must also take care to
    574     // keep the same address calculation as loads and stores. For T32, that
    575     // means using something like ADR, which uses AlignDown(PC, 4).
    576 
    577     // We don't handle positive offsets from PC because the intention is not
    578     // clear; does the user expect the offset from the current
    579     // GetCursorOffset(), or to allow a certain amount of space after the
    580     // instruction?
    581     VIXL_ASSERT((offset & 0x80000000) != 0);
    582     if (IsUsingT32()) {
    583       // T32: make the first instruction "SUB (immediate, from PC)" -- an alias
    584       // of ADR -- to get behaviour like loads and stores. This ADR can handle
    585       // at least as much offset as the load_store_offset so it can replace it.
    586 
    587       uint32_t sub_pc_offset = (-offset) & 0xfff;
    588       load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
    589       add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
    590 
    591       ExactAssemblyScope scope(this, k32BitT32InstructionSizeInBytes);
    592       sub(cond, scratch, base, sub_pc_offset);
    593 
    594       if (add_offset == 0) return MemOperand(scratch, load_store_offset);
    595 
    596       // The rest of the offset can be generated in the usual way.
    597       base = scratch;
    598     }
    599     // A32 can use any SUB instruction, so we don't have to do anything special
    600     // here except to ensure that we read the PC first.
    601   }
    602 
    603   add(cond, scratch, base, add_offset);
    604   return MemOperand(scratch, load_store_offset);
    605 }
    606 
    607 
    608 uint32_t MacroAssembler::GetOffsetMask(InstructionType type,
    609                                        AddrMode addrmode) {
    610   switch (type) {
    611     case kLdr:
    612     case kLdrb:
    613     case kStr:
    614     case kStrb:
    615       if (IsUsingA32() || (addrmode == Offset)) {
    616         return 0xfff;
    617       } else {
    618         return 0xff;
    619       }
    620     case kLdrsb:
    621     case kLdrh:
    622     case kLdrsh:
    623     case kStrh:
    624       if (IsUsingT32() && (addrmode == Offset)) {
    625         return 0xfff;
    626       } else {
    627         return 0xff;
    628       }
    629     case kVldr:
    630     case kVstr:
    631       return 0x3fc;
    632     case kLdrd:
    633     case kStrd:
    634       if (IsUsingA32()) {
    635         return 0xff;
    636       } else {
    637         return 0x3fc;
    638       }
    639     default:
    640       VIXL_UNREACHABLE();
    641       return 0;
    642   }
    643 }
    644 
    645 
    646 HARDFLOAT void PrintfTrampolineRRRR(
    647     const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
    648   printf(format, a, b, c, d);
    649 }
    650 
    651 
    652 HARDFLOAT void PrintfTrampolineRRRD(
    653     const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
    654   printf(format, a, b, c, d);
    655 }
    656 
    657 
    658 HARDFLOAT void PrintfTrampolineRRDR(
    659     const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
    660   printf(format, a, b, c, d);
    661 }
    662 
    663 
    664 HARDFLOAT void PrintfTrampolineRRDD(
    665     const char* format, uint32_t a, uint32_t b, double c, double d) {
    666   printf(format, a, b, c, d);
    667 }
    668 
    669 
    670 HARDFLOAT void PrintfTrampolineRDRR(
    671     const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
    672   printf(format, a, b, c, d);
    673 }
    674 
    675 
    676 HARDFLOAT void PrintfTrampolineRDRD(
    677     const char* format, uint32_t a, double b, uint32_t c, double d) {
    678   printf(format, a, b, c, d);
    679 }
    680 
    681 
    682 HARDFLOAT void PrintfTrampolineRDDR(
    683     const char* format, uint32_t a, double b, double c, uint32_t d) {
    684   printf(format, a, b, c, d);
    685 }
    686 
    687 
    688 HARDFLOAT void PrintfTrampolineRDDD(
    689     const char* format, uint32_t a, double b, double c, double d) {
    690   printf(format, a, b, c, d);
    691 }
    692 
    693 
    694 HARDFLOAT void PrintfTrampolineDRRR(
    695     const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
    696   printf(format, a, b, c, d);
    697 }
    698 
    699 
    700 HARDFLOAT void PrintfTrampolineDRRD(
    701     const char* format, double a, uint32_t b, uint32_t c, double d) {
    702   printf(format, a, b, c, d);
    703 }
    704 
    705 
    706 HARDFLOAT void PrintfTrampolineDRDR(
    707     const char* format, double a, uint32_t b, double c, uint32_t d) {
    708   printf(format, a, b, c, d);
    709 }
    710 
    711 
    712 HARDFLOAT void PrintfTrampolineDRDD(
    713     const char* format, double a, uint32_t b, double c, double d) {
    714   printf(format, a, b, c, d);
    715 }
    716 
    717 
    718 HARDFLOAT void PrintfTrampolineDDRR(
    719     const char* format, double a, double b, uint32_t c, uint32_t d) {
    720   printf(format, a, b, c, d);
    721 }
    722 
    723 
    724 HARDFLOAT void PrintfTrampolineDDRD(
    725     const char* format, double a, double b, uint32_t c, double d) {
    726   printf(format, a, b, c, d);
    727 }
    728 
    729 
    730 HARDFLOAT void PrintfTrampolineDDDR(
    731     const char* format, double a, double b, double c, uint32_t d) {
    732   printf(format, a, b, c, d);
    733 }
    734 
    735 
    736 HARDFLOAT void PrintfTrampolineDDDD(
    737     const char* format, double a, double b, double c, double d) {
    738   printf(format, a, b, c, d);
    739 }
    740 
    741 
    742 void MacroAssembler::Printf(const char* format,
    743                             CPURegister reg1,
    744                             CPURegister reg2,
    745                             CPURegister reg3,
    746                             CPURegister reg4) {
    747   // Exclude all registers from the available scratch registers, so
    748   // that we are able to use ip below.
    749   // TODO: Refactor this function to use UseScratchRegisterScope
    750   // for temporary registers below.
    751   UseScratchRegisterScope scratch(this);
    752   scratch.ExcludeAll();
    753   if (generate_simulator_code_) {
    754     PushRegister(reg4);
    755     PushRegister(reg3);
    756     PushRegister(reg2);
    757     PushRegister(reg1);
    758     Push(RegisterList(r0, r1));
    759     StringLiteral* format_literal =
    760         new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
    761     Adr(r0, format_literal);
    762     uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
    763                     (reg2.GetType() << 4) | reg1.GetType();
    764     Mov(r1, args);
    765     Hvc(kPrintfCode);
    766     Pop(RegisterList(r0, r1));
    767     int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
    768                reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
    769     Drop(size);
    770   } else {
    771     // Generate on a native platform => 32 bit environment.
    772     // Preserve core registers r0-r3, r12, r14
    773     const uint32_t saved_registers_mask =
    774         kCallerSavedRegistersMask | (1 << r5.GetCode());
    775     Push(RegisterList(saved_registers_mask));
    776     // Push VFP registers.
    777     Vpush(Untyped64, DRegisterList(d0, 8));
    778     if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
    779     // Search one register which has been saved and which doesn't need to be
    780     // printed.
    781     RegisterList available_registers(kCallerSavedRegistersMask);
    782     if (reg1.GetType() == CPURegister::kRRegister) {
    783       available_registers.Remove(Register(reg1.GetCode()));
    784     }
    785     if (reg2.GetType() == CPURegister::kRRegister) {
    786       available_registers.Remove(Register(reg2.GetCode()));
    787     }
    788     if (reg3.GetType() == CPURegister::kRRegister) {
    789       available_registers.Remove(Register(reg3.GetCode()));
    790     }
    791     if (reg4.GetType() == CPURegister::kRRegister) {
    792       available_registers.Remove(Register(reg4.GetCode()));
    793     }
    794     Register tmp = available_registers.GetFirstAvailableRegister();
    795     VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
    796     // Push the flags.
    797     Mrs(tmp, APSR);
    798     Push(tmp);
    799     Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
    800     Push(tmp);
    801     // Push the registers to print on the stack.
    802     PushRegister(reg4);
    803     PushRegister(reg3);
    804     PushRegister(reg2);
    805     PushRegister(reg1);
    806     int core_count = 1;
    807     int vfp_count = 0;
    808     uint32_t printf_type = 0;
    809     // Pop the registers to print and store them into r1-r3 and/or d0-d3.
    810     // Reg4 may stay into the stack if all the register to print are core
    811     // registers.
    812     PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
    813     PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
    814     PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
    815     PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
    816     // Ensure that the stack is aligned on 8 bytes.
    817     And(r5, sp, 0x7);
    818     if (core_count == 5) {
    819       // One 32 bit argument (reg4) has been left on the stack =>  align the
    820       // stack
    821       // before the argument.
    822       Pop(r0);
    823       Sub(sp, sp, r5);
    824       Push(r0);
    825     } else {
    826       Sub(sp, sp, r5);
    827     }
    828     // Select the right trampoline depending on the arguments.
    829     uintptr_t address;
    830     switch (printf_type) {
    831       case 0:
    832         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
    833         break;
    834       case 1:
    835         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
    836         break;
    837       case 2:
    838         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
    839         break;
    840       case 3:
    841         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
    842         break;
    843       case 4:
    844         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
    845         break;
    846       case 5:
    847         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
    848         break;
    849       case 6:
    850         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
    851         break;
    852       case 7:
    853         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
    854         break;
    855       case 8:
    856         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
    857         break;
    858       case 9:
    859         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
    860         break;
    861       case 10:
    862         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
    863         break;
    864       case 11:
    865         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
    866         break;
    867       case 12:
    868         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
    869         break;
    870       case 13:
    871         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
    872         break;
    873       case 14:
    874         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
    875         break;
    876       case 15:
    877         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
    878         break;
    879       default:
    880         VIXL_UNREACHABLE();
    881         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
    882         break;
    883     }
    884     StringLiteral* format_literal =
    885         new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
    886     Adr(r0, format_literal);
    887     Mov(ip, Operand::From(address));
    888     Blx(ip);
    889     // If register reg4 was left on the stack => skip it.
    890     if (core_count == 5) Drop(kRegSizeInBytes);
    891     // Restore the stack as it was before alignment.
    892     Add(sp, sp, r5);
    893     // Restore the flags.
    894     Pop(tmp);
    895     Vmsr(FPSCR, tmp);
    896     Pop(tmp);
    897     Msr(APSR_nzcvqg, tmp);
    898     // Restore the regsisters.
    899     if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
    900     Vpop(Untyped64, DRegisterList(d0, 8));
    901     Pop(RegisterList(saved_registers_mask));
    902   }
    903 }
    904 
    905 
    906 void MacroAssembler::PushRegister(CPURegister reg) {
    907   switch (reg.GetType()) {
    908     case CPURegister::kNoRegister:
    909       break;
    910     case CPURegister::kRRegister:
    911       Push(Register(reg.GetCode()));
    912       break;
    913     case CPURegister::kSRegister:
    914       Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
    915       break;
    916     case CPURegister::kDRegister:
    917       Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
    918       break;
    919     case CPURegister::kQRegister:
    920       VIXL_UNIMPLEMENTED();
    921       break;
    922   }
    923 }
    924 
    925 
    926 void MacroAssembler::PreparePrintfArgument(CPURegister reg,
    927                                            int* core_count,
    928                                            int* vfp_count,
    929                                            uint32_t* printf_type) {
    930   switch (reg.GetType()) {
    931     case CPURegister::kNoRegister:
    932       break;
    933     case CPURegister::kRRegister:
    934       VIXL_ASSERT(*core_count <= 4);
    935       if (*core_count < 4) Pop(Register(*core_count));
    936       *core_count += 1;
    937       break;
    938     case CPURegister::kSRegister:
    939       VIXL_ASSERT(*vfp_count < 4);
    940       *printf_type |= 1 << (*core_count + *vfp_count - 1);
    941       Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
    942       Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
    943       *vfp_count += 1;
    944       break;
    945     case CPURegister::kDRegister:
    946       VIXL_ASSERT(*vfp_count < 4);
    947       *printf_type |= 1 << (*core_count + *vfp_count - 1);
    948       Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
    949       *vfp_count += 1;
    950       break;
    951     case CPURegister::kQRegister:
    952       VIXL_UNIMPLEMENTED();
    953       break;
    954   }
    955 }
    956 
    957 
    958 void MacroAssembler::Delegate(InstructionType type,
    959                               InstructionCondROp instruction,
    960                               Condition cond,
    961                               Register rn,
    962                               const Operand& operand) {
    963   VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
    964               (type == kUxtb16));
    965 
    966   if (type == kMovt) {
    967     VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.\n");
    968   }
    969 
    970   // This delegate only supports teq with immediates.
    971   CONTEXT_SCOPE;
    972   if ((type == kTeq) && operand.IsImmediate()) {
    973     UseScratchRegisterScope temps(this);
    974     Register scratch = temps.Acquire();
    975     HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
    976     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
    977     teq(cond, rn, scratch);
    978     return;
    979   }
    980   Assembler::Delegate(type, instruction, cond, rn, operand);
    981 }
    982 
    983 
    984 void MacroAssembler::Delegate(InstructionType type,
    985                               InstructionCondSizeROp instruction,
    986                               Condition cond,
    987                               EncodingSize size,
    988                               Register rn,
    989                               const Operand& operand) {
    990   CONTEXT_SCOPE;
    991   VIXL_ASSERT(size.IsBest());
    992   VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
    993               (type == kMovs) || (type == kMvn) || (type == kMvns) ||
    994               (type == kSxtb) || (type == kSxth) || (type == kTst) ||
    995               (type == kUxtb) || (type == kUxth));
    996   if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
    997     VIXL_ASSERT((type != kMov) || (type != kMovs));
    998     InstructionCondRROp shiftop = NULL;
    999     switch (operand.GetShift().GetType()) {
   1000       case LSL:
   1001         shiftop = &Assembler::lsl;
   1002         break;
   1003       case LSR:
   1004         shiftop = &Assembler::lsr;
   1005         break;
   1006       case ASR:
   1007         shiftop = &Assembler::asr;
   1008         break;
   1009       case RRX:
   1010         // A RegisterShiftedRegister operand cannot have a shift of type RRX.
   1011         VIXL_UNREACHABLE();
   1012         break;
   1013       case ROR:
   1014         shiftop = &Assembler::ror;
   1015         break;
   1016       default:
   1017         VIXL_UNREACHABLE();
   1018     }
   1019     if (shiftop != NULL) {
   1020       UseScratchRegisterScope temps(this);
   1021       Register scratch = temps.Acquire();
   1022       CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   1023       (this->*shiftop)(cond,
   1024                        scratch,
   1025                        operand.GetBaseRegister(),
   1026                        operand.GetShiftRegister());
   1027       (this->*instruction)(cond, size, rn, scratch);
   1028       return;
   1029     }
   1030   }
   1031   if (operand.IsImmediate()) {
   1032     uint32_t imm = operand.GetImmediate();
   1033     switch (type) {
   1034       case kMov:
   1035       case kMovs:
   1036         if (!rn.IsPC()) {
   1037           // Immediate is too large, but not using PC, so handle with mov{t}.
   1038           HandleOutOfBoundsImmediate(cond, rn, imm);
   1039           if (type == kMovs) {
   1040             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1041             tst(cond, rn, rn);
   1042           }
   1043           return;
   1044         } else if (type == kMov) {
   1045           VIXL_ASSERT(IsUsingA32() || cond.Is(al));
   1046           // Immediate is too large and using PC, so handle using a temporary
   1047           // register.
   1048           UseScratchRegisterScope temps(this);
   1049           Register scratch = temps.Acquire();
   1050           HandleOutOfBoundsImmediate(al, scratch, imm);
   1051           EnsureEmitFor(kMaxInstructionSizeInBytes);
   1052           bx(cond, scratch);
   1053           return;
   1054         }
   1055         break;
   1056       case kCmn:
   1057       case kCmp:
   1058         if (IsUsingA32() || !rn.IsPC()) {
   1059           UseScratchRegisterScope temps(this);
   1060           Register scratch = temps.Acquire();
   1061           HandleOutOfBoundsImmediate(cond, scratch, imm);
   1062           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1063           (this->*instruction)(cond, size, rn, scratch);
   1064           return;
   1065         }
   1066         break;
   1067       case kMvn:
   1068       case kMvns:
   1069         if (!rn.IsPC()) {
   1070           UseScratchRegisterScope temps(this);
   1071           Register scratch = temps.Acquire();
   1072           HandleOutOfBoundsImmediate(cond, scratch, imm);
   1073           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1074           (this->*instruction)(cond, size, rn, scratch);
   1075           return;
   1076         }
   1077         break;
   1078       case kTst:
   1079         if (IsUsingA32() || !rn.IsPC()) {
   1080           UseScratchRegisterScope temps(this);
   1081           Register scratch = temps.Acquire();
   1082           HandleOutOfBoundsImmediate(cond, scratch, imm);
   1083           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1084           (this->*instruction)(cond, size, rn, scratch);
   1085           return;
   1086         }
   1087         break;
   1088       default:  // kSxtb, Sxth, Uxtb, Uxth
   1089         break;
   1090     }
   1091   }
   1092   Assembler::Delegate(type, instruction, cond, size, rn, operand);
   1093 }
   1094 
   1095 
   1096 void MacroAssembler::Delegate(InstructionType type,
   1097                               InstructionCondRROp instruction,
   1098                               Condition cond,
   1099                               Register rd,
   1100                               Register rn,
   1101                               const Operand& operand) {
   1102   if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
   1103       (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
   1104       (type == kPkhbt) || (type == kPkhtb)) {
   1105     UnimplementedDelegate(type);
   1106     return;
   1107   }
   1108 
   1109   // This delegate only handles the following instructions.
   1110   VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
   1111               (type == kRscs));
   1112   CONTEXT_SCOPE;
   1113 
   1114   // T32 does not support register shifted register operands, emulate it.
   1115   if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
   1116     InstructionCondRROp shiftop = NULL;
   1117     switch (operand.GetShift().GetType()) {
   1118       case LSL:
   1119         shiftop = &Assembler::lsl;
   1120         break;
   1121       case LSR:
   1122         shiftop = &Assembler::lsr;
   1123         break;
   1124       case ASR:
   1125         shiftop = &Assembler::asr;
   1126         break;
   1127       case RRX:
   1128         // A RegisterShiftedRegister operand cannot have a shift of type RRX.
   1129         VIXL_UNREACHABLE();
   1130         break;
   1131       case ROR:
   1132         shiftop = &Assembler::ror;
   1133         break;
   1134       default:
   1135         VIXL_UNREACHABLE();
   1136     }
   1137     if (shiftop != NULL) {
   1138       UseScratchRegisterScope temps(this);
   1139       Register rm = operand.GetBaseRegister();
   1140       Register rs = operand.GetShiftRegister();
   1141       // Try to use rd as a scratch register. We can do this if it aliases rs or
   1142       // rm (because we read them in the first instruction), but not rn.
   1143       if (!rd.Is(rn)) temps.Include(rd);
   1144       Register scratch = temps.Acquire();
   1145       // TODO: The scope length was measured empirically. We should analyse the
   1146       // worst-case size and add targetted tests.
   1147       CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1148       (this->*shiftop)(cond, scratch, rm, rs);
   1149       (this->*instruction)(cond, rd, rn, scratch);
   1150       return;
   1151     }
   1152   }
   1153 
   1154   // T32 does not have a Rsc instruction, negate the lhs input and turn it into
   1155   // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
   1156   //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
   1157   if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
   1158     // The RegisterShiftRegister case should have been handled above.
   1159     VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
   1160     UseScratchRegisterScope temps(this);
   1161     // Try to use rd as a scratch register. We can do this if it aliases rn
   1162     // (because we read it in the first instruction), but not rm.
   1163     temps.Include(rd);
   1164     temps.Exclude(operand);
   1165     Register negated_rn = temps.Acquire();
   1166     {
   1167       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1168       mvn(cond, negated_rn, rn);
   1169     }
   1170     if (type == kRsc) {
   1171       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1172       adc(cond, rd, negated_rn, operand);
   1173       return;
   1174     }
   1175     // TODO: We shouldn't have to specify how much space the next instruction
   1176     // needs.
   1177     CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1178     adcs(cond, rd, negated_rn, operand);
   1179     return;
   1180   }
   1181 
   1182   if (operand.IsImmediate()) {
   1183     // If the immediate can be encoded when inverted, turn Orn into Orr.
   1184     // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
   1185     // mov.
   1186     int32_t imm = operand.GetSignedImmediate();
   1187     if (((type == kOrn) || (type == kOrns)) && IsModifiedImmediate(~imm)) {
   1188       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1189       switch (type) {
   1190         case kOrn:
   1191           orr(cond, rd, rn, ~imm);
   1192           return;
   1193         case kOrns:
   1194           orrs(cond, rd, rn, ~imm);
   1195           return;
   1196         default:
   1197           VIXL_UNREACHABLE();
   1198           break;
   1199       }
   1200     }
   1201   }
   1202 
   1203   // A32 does not have a Orn instruction, negate the rhs input and turn it into
   1204   // a Orr.
   1205   if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
   1206     // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
   1207     //  mvn r0, r2
   1208     //  orr r0, r1, r0
   1209     Register scratch;
   1210     UseScratchRegisterScope temps(this);
   1211     // Try to use rd as a scratch register. We can do this if it aliases rs or
   1212     // rm (because we read them in the first instruction), but not rn.
   1213     if (!rd.Is(rn)) temps.Include(rd);
   1214     scratch = temps.Acquire();
   1215     {
   1216       // TODO: We shouldn't have to specify how much space the next instruction
   1217       // needs.
   1218       CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1219       mvn(cond, scratch, operand);
   1220     }
   1221     if (type == kOrns) {
   1222       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1223       orrs(cond, rd, rn, scratch);
   1224       return;
   1225     }
   1226     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1227     orr(cond, rd, rn, scratch);
   1228     return;
   1229   }
   1230 
   1231   if (operand.IsImmediate()) {
   1232     UseScratchRegisterScope temps(this);
   1233     // Allow using the destination as a scratch register if possible.
   1234     if (!rd.Is(rn)) temps.Include(rd);
   1235     Register scratch = temps.Acquire();
   1236     int32_t imm = operand.GetSignedImmediate();
   1237     HandleOutOfBoundsImmediate(cond, scratch, imm);
   1238     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1239     (this->*instruction)(cond, rd, rn, scratch);
   1240     return;
   1241   }
   1242   Assembler::Delegate(type, instruction, cond, rd, rn, operand);
   1243 }
   1244 
   1245 
   1246 void MacroAssembler::Delegate(InstructionType type,
   1247                               InstructionCondSizeRL instruction,
   1248                               Condition cond,
   1249                               EncodingSize size,
   1250                               Register rd,
   1251                               Label* label) {
   1252   VIXL_ASSERT((type == kLdr) || (type == kAdr));
   1253 
   1254   CONTEXT_SCOPE;
   1255   VIXL_ASSERT(size.IsBest());
   1256 
   1257   if ((type == kLdr) && label->IsBound()) {
   1258     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
   1259     UseScratchRegisterScope temps(this);
   1260     temps.Include(rd);
   1261     uint32_t mask = GetOffsetMask(type, Offset);
   1262     ldr(rd, MemOperandComputationHelper(cond, temps.Acquire(), label, mask));
   1263     return;
   1264   }
   1265 
   1266   Assembler::Delegate(type, instruction, cond, size, rd, label);
   1267 }
   1268 
   1269 
   1270 bool MacroAssembler::GenerateSplitInstruction(
   1271     InstructionCondSizeRROp instruction,
   1272     Condition cond,
   1273     Register rd,
   1274     Register rn,
   1275     uint32_t imm,
   1276     uint32_t mask) {
   1277   uint32_t high = imm & ~mask;
   1278   if (!IsModifiedImmediate(high) && !rn.IsPC()) return false;
   1279   // If high is a modified immediate, we can perform the operation with
   1280   // only 2 instructions.
   1281   // Else, if rn is PC, we want to avoid moving PC into a temporary.
   1282   // Therefore, we also use the pattern even if the second call may
   1283   // generate 3 instructions.
   1284   uint32_t low = imm & mask;
   1285   CodeBufferCheckScope scope(this,
   1286                              (rn.IsPC() ? 4 : 2) * kMaxInstructionSizeInBytes);
   1287   (this->*instruction)(cond, Best, rd, rn, low);
   1288   (this->*instruction)(cond, Best, rd, rd, high);
   1289   return true;
   1290 }
   1291 
   1292 
   1293 void MacroAssembler::Delegate(InstructionType type,
   1294                               InstructionCondSizeRROp instruction,
   1295                               Condition cond,
   1296                               EncodingSize size,
   1297                               Register rd,
   1298                               Register rn,
   1299                               const Operand& operand) {
   1300   VIXL_ASSERT(
   1301       (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
   1302       (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
   1303       (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
   1304       (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
   1305       (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
   1306       (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
   1307       (type == kSub) || (type == kSubs));
   1308 
   1309   CONTEXT_SCOPE;
   1310   VIXL_ASSERT(size.IsBest());
   1311   if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
   1312     InstructionCondRROp shiftop = NULL;
   1313     switch (operand.GetShift().GetType()) {
   1314       case LSL:
   1315         shiftop = &Assembler::lsl;
   1316         break;
   1317       case LSR:
   1318         shiftop = &Assembler::lsr;
   1319         break;
   1320       case ASR:
   1321         shiftop = &Assembler::asr;
   1322         break;
   1323       case RRX:
   1324         // A RegisterShiftedRegister operand cannot have a shift of type RRX.
   1325         VIXL_UNREACHABLE();
   1326         break;
   1327       case ROR:
   1328         shiftop = &Assembler::ror;
   1329         break;
   1330       default:
   1331         VIXL_UNREACHABLE();
   1332     }
   1333     if (shiftop != NULL) {
   1334       UseScratchRegisterScope temps(this);
   1335       Register rm = operand.GetBaseRegister();
   1336       Register rs = operand.GetShiftRegister();
   1337       // Try to use rd as a scratch register. We can do this if it aliases rs or
   1338       // rm (because we read them in the first instruction), but not rn.
   1339       if (!rd.Is(rn)) temps.Include(rd);
   1340       Register scratch = temps.Acquire();
   1341       CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   1342       (this->*shiftop)(cond, scratch, rm, rs);
   1343       (this->*instruction)(cond, size, rd, rn, scratch);
   1344       return;
   1345     }
   1346   }
   1347   if (operand.IsImmediate()) {
   1348     int32_t imm = operand.GetSignedImmediate();
   1349     if (ImmediateT32::IsImmediateT32(~imm)) {
   1350       if (IsUsingT32()) {
   1351         switch (type) {
   1352           case kOrr:
   1353             orn(cond, rd, rn, ~imm);
   1354             return;
   1355           case kOrrs:
   1356             orns(cond, rd, rn, ~imm);
   1357             return;
   1358           default:
   1359             break;
   1360         }
   1361       }
   1362     }
   1363     if (imm < 0) {
   1364       InstructionCondSizeRROp asmcb = NULL;
   1365       // Add and sub are equivalent using an arithmetic negation:
   1366       //   add rd, rn, #imm <-> sub rd, rn, - #imm
   1367       // Add and sub with carry are equivalent using a bitwise NOT:
   1368       //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
   1369       switch (type) {
   1370         case kAdd:
   1371           asmcb = &Assembler::sub;
   1372           imm = -imm;
   1373           break;
   1374         case kAdds:
   1375           asmcb = &Assembler::subs;
   1376           imm = -imm;
   1377           break;
   1378         case kSub:
   1379           asmcb = &Assembler::add;
   1380           imm = -imm;
   1381           break;
   1382         case kSubs:
   1383           asmcb = &Assembler::adds;
   1384           imm = -imm;
   1385           break;
   1386         case kAdc:
   1387           asmcb = &Assembler::sbc;
   1388           imm = ~imm;
   1389           break;
   1390         case kAdcs:
   1391           asmcb = &Assembler::sbcs;
   1392           imm = ~imm;
   1393           break;
   1394         case kSbc:
   1395           asmcb = &Assembler::adc;
   1396           imm = ~imm;
   1397           break;
   1398         case kSbcs:
   1399           asmcb = &Assembler::adcs;
   1400           imm = ~imm;
   1401           break;
   1402         default:
   1403           break;
   1404       }
   1405       if (asmcb != NULL) {
   1406         CodeBufferCheckScope scope(this, 4 * kMaxInstructionSizeInBytes);
   1407         (this->*asmcb)(cond, size, rd, rn, Operand(imm));
   1408         return;
   1409       }
   1410     }
   1411 
   1412     // When rn is PC, only handle negative offsets. The correct way to handle
   1413     // positive offsets isn't clear; does the user want the offset from the
   1414     // start of the macro, or from the end (to allow a certain amount of space)?
   1415     // When type is Add or Sub, imm is always positive (imm < 0 has just been
   1416     // handled and imm == 0 would have been generated without the need of a
   1417     // delegate). Therefore, only add to PC is forbidden here.
   1418     if ((((type == kAdd) && !rn.IsPC()) || (type == kSub)) &&
   1419         (IsUsingA32() || (!rd.IsPC() && !rn.IsPC()))) {
   1420       VIXL_ASSERT(imm > 0);
   1421       // Try to break the constant into two modified immediates.
   1422       // For T32 also try to break the constant into one imm12 and one modified
   1423       // immediate. Count the trailing zeroes and get the biggest even value.
   1424       int trailing_zeroes = CountTrailingZeros(imm) & ~1u;
   1425       uint32_t mask = ((trailing_zeroes < 4) && IsUsingT32())
   1426                           ? 0xfff
   1427                           : (0xff << trailing_zeroes);
   1428       if (GenerateSplitInstruction(instruction, cond, rd, rn, imm, mask)) {
   1429         return;
   1430       }
   1431       InstructionCondSizeRROp asmcb = NULL;
   1432       switch (type) {
   1433         case kAdd:
   1434           asmcb = &Assembler::sub;
   1435           break;
   1436         case kSub:
   1437           asmcb = &Assembler::add;
   1438           break;
   1439         default:
   1440           VIXL_UNREACHABLE();
   1441       }
   1442       if (GenerateSplitInstruction(asmcb, cond, rd, rn, -imm, mask)) {
   1443         return;
   1444       }
   1445     }
   1446 
   1447     UseScratchRegisterScope temps(this);
   1448     // Allow using the destination as a scratch register if possible.
   1449     if (!rd.Is(rn)) temps.Include(rd);
   1450     if (rn.IsPC()) {
   1451       // If we're reading the PC, we need to do it in the first instruction,
   1452       // otherwise we'll read the wrong value. We rely on this to handle the
   1453       // long-range PC-relative MemOperands which can result from user-managed
   1454       // literals.
   1455 
   1456       // Only handle negative offsets. The correct way to handle positive
   1457       // offsets isn't clear; does the user want the offset from the start of
   1458       // the macro, or from the end (to allow a certain amount of space)?
   1459       bool offset_is_negative_or_zero = (imm <= 0);
   1460       switch (type) {
   1461         case kAdd:
   1462         case kAdds:
   1463           offset_is_negative_or_zero = (imm <= 0);
   1464           break;
   1465         case kSub:
   1466         case kSubs:
   1467           offset_is_negative_or_zero = (imm >= 0);
   1468           break;
   1469         case kAdc:
   1470         case kAdcs:
   1471           offset_is_negative_or_zero = (imm < 0);
   1472           break;
   1473         case kSbc:
   1474         case kSbcs:
   1475           offset_is_negative_or_zero = (imm > 0);
   1476           break;
   1477         default:
   1478           break;
   1479       }
   1480       if (offset_is_negative_or_zero) {
   1481         {
   1482           rn = temps.Acquire();
   1483           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1484           mov(cond, rn, pc);
   1485         }
   1486         // Recurse rather than falling through, to try to get the immediate into
   1487         // a single instruction.
   1488         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1489         (this->*instruction)(cond, size, rd, rn, operand);
   1490         return;
   1491       }
   1492     } else {
   1493       Register scratch = temps.Acquire();
   1494       // TODO: The scope length was measured empirically. We should analyse the
   1495       // worst-case size and add targetted tests.
   1496       CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1497       mov(cond, scratch, operand.GetImmediate());
   1498       (this->*instruction)(cond, size, rd, rn, scratch);
   1499       return;
   1500     }
   1501   }
   1502   Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
   1503 }
   1504 
   1505 
   1506 void MacroAssembler::Delegate(InstructionType type,
   1507                               InstructionRL instruction,
   1508                               Register rn,
   1509                               Label* label) {
   1510   VIXL_ASSERT((type == kCbz) || (type == kCbnz));
   1511 
   1512   CONTEXT_SCOPE;
   1513   CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   1514   if (IsUsingA32()) {
   1515     if (type == kCbz) {
   1516       VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
   1517     } else {
   1518       VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
   1519     }
   1520   } else if (rn.IsLow()) {
   1521     switch (type) {
   1522       case kCbnz: {
   1523         Label done;
   1524         cbz(rn, &done);
   1525         b(label);
   1526         Bind(&done);
   1527         return;
   1528       }
   1529       case kCbz: {
   1530         Label done;
   1531         cbnz(rn, &done);
   1532         b(label);
   1533         Bind(&done);
   1534         return;
   1535       }
   1536       default:
   1537         break;
   1538     }
   1539   }
   1540   Assembler::Delegate(type, instruction, rn, label);
   1541 }
   1542 
   1543 
   1544 template <typename T>
   1545 static inline bool IsI64BitPattern(T imm) {
   1546   for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
   1547     if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
   1548   }
   1549   return true;
   1550 }
   1551 
   1552 
   1553 template <typename T>
   1554 static inline bool IsI8BitPattern(T imm) {
   1555   uint8_t imm8 = imm & 0xff;
   1556   for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
   1557     imm >>= 8;
   1558     if ((imm & 0xff) != imm8) return false;
   1559   }
   1560   return true;
   1561 }
   1562 
   1563 
   1564 static inline bool CanBeInverted(uint32_t imm32) {
   1565   uint32_t fill8 = 0;
   1566 
   1567   if ((imm32 & 0xffffff00) == 0xffffff00) {
   1568     //    11111111 11111111 11111111 abcdefgh
   1569     return true;
   1570   }
   1571   if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
   1572     fill8 = imm32 & 0xff;
   1573     imm32 >>= 8;
   1574     if ((imm32 >> 8) == 0xffff) {
   1575       //    11111111 11111111 abcdefgh 00000000
   1576       // or 11111111 11111111 abcdefgh 11111111
   1577       return true;
   1578     }
   1579     if ((imm32 & 0xff) == fill8) {
   1580       imm32 >>= 8;
   1581       if ((imm32 >> 8) == 0xff) {
   1582         //    11111111 abcdefgh 00000000 00000000
   1583         // or 11111111 abcdefgh 11111111 11111111
   1584         return true;
   1585       }
   1586       if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
   1587         //    abcdefgh 11111111 11111111 11111111
   1588         return true;
   1589       }
   1590     }
   1591   }
   1592   return false;
   1593 }
   1594 
   1595 
   1596 template <typename RES, typename T>
   1597 static inline RES replicate(T imm) {
   1598   VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
   1599               (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
   1600   RES res = imm;
   1601   for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
   1602     res = (res << (sizeof(T) * 8)) | imm;
   1603   }
   1604   return res;
   1605 }
   1606 
   1607 
   1608 void MacroAssembler::Delegate(InstructionType type,
   1609                               InstructionCondDtSSop instruction,
   1610                               Condition cond,
   1611                               DataType dt,
   1612                               SRegister rd,
   1613                               const SOperand& operand) {
   1614   CONTEXT_SCOPE;
   1615   if (type == kVmov) {
   1616     if (operand.IsImmediate() && dt.Is(F32)) {
   1617       const NeonImmediate& neon_imm = operand.GetNeonImmediate();
   1618       if (neon_imm.CanConvert<float>()) {
   1619         // movw ip, imm16
   1620         // movk ip, imm16
   1621         // vmov s0, ip
   1622         UseScratchRegisterScope temps(this);
   1623         Register scratch = temps.Acquire();
   1624         float f = neon_imm.GetImmediate<float>();
   1625         // TODO: The scope length was measured empirically. We should analyse
   1626         // the
   1627         // worst-case size and add targetted tests.
   1628         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1629         mov(cond, scratch, FloatToRawbits(f));
   1630         vmov(cond, rd, scratch);
   1631         return;
   1632       }
   1633     }
   1634   }
   1635   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
   1636 }
   1637 
   1638 
   1639 void MacroAssembler::Delegate(InstructionType type,
   1640                               InstructionCondDtDDop instruction,
   1641                               Condition cond,
   1642                               DataType dt,
   1643                               DRegister rd,
   1644                               const DOperand& operand) {
   1645   CONTEXT_SCOPE;
   1646   if (type == kVmov) {
   1647     if (operand.IsImmediate()) {
   1648       const NeonImmediate& neon_imm = operand.GetNeonImmediate();
   1649       switch (dt.GetValue()) {
   1650         case I32:
   1651           if (neon_imm.CanConvert<uint32_t>()) {
   1652             uint32_t imm = neon_imm.GetImmediate<uint32_t>();
   1653             // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
   1654             if (IsI8BitPattern(imm)) {
   1655               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1656               vmov(cond, I8, rd, imm & 0xff);
   1657               return;
   1658             }
   1659             // vmov.i32 d0, 0xff0000ff will translate into
   1660             // vmov.i64 d0, 0xff0000ffff0000ff
   1661             if (IsI64BitPattern(imm)) {
   1662               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1663               vmov(cond, I64, rd, replicate<uint64_t>(imm));
   1664               return;
   1665             }
   1666             // vmov.i32 d0, 0xffab0000 will translate into
   1667             // vmvn.i32 d0, 0x0054ffff
   1668             if (cond.Is(al) && CanBeInverted(imm)) {
   1669               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1670               vmvn(I32, rd, ~imm);
   1671               return;
   1672             }
   1673           }
   1674           break;
   1675         case I16:
   1676           if (neon_imm.CanConvert<uint16_t>()) {
   1677             uint16_t imm = neon_imm.GetImmediate<uint16_t>();
   1678             // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
   1679             if (IsI8BitPattern(imm)) {
   1680               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1681               vmov(cond, I8, rd, imm & 0xff);
   1682               return;
   1683             }
   1684           }
   1685           break;
   1686         case I64:
   1687           if (neon_imm.CanConvert<uint64_t>()) {
   1688             uint64_t imm = neon_imm.GetImmediate<uint64_t>();
   1689             // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
   1690             if (IsI8BitPattern(imm)) {
   1691               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1692               vmov(cond, I8, rd, imm & 0xff);
   1693               return;
   1694             }
   1695             // mov ip, lo(imm64)
   1696             // vdup d0, ip
   1697             // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
   1698             // preserved
   1699             {
   1700               UseScratchRegisterScope temps(this);
   1701               Register scratch = temps.Acquire();
   1702               {
   1703                 // TODO: The scope length was measured empirically. We should
   1704                 // analyse the
   1705                 // worst-case size and add targetted tests.
   1706                 CodeBufferCheckScope scope(this,
   1707                                            2 * kMaxInstructionSizeInBytes);
   1708                 mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
   1709               }
   1710               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1711               vdup(cond, Untyped32, rd, scratch);
   1712             }
   1713             // mov ip, hi(imm64)
   1714             // vmov d0[1], ip
   1715             {
   1716               UseScratchRegisterScope temps(this);
   1717               Register scratch = temps.Acquire();
   1718               {
   1719                 // TODO: The scope length was measured empirically. We should
   1720                 // analyse the
   1721                 // worst-case size and add targetted tests.
   1722                 CodeBufferCheckScope scope(this,
   1723                                            2 * kMaxInstructionSizeInBytes);
   1724                 mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
   1725               }
   1726               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1727               vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
   1728             }
   1729             return;
   1730           }
   1731           break;
   1732         default:
   1733           break;
   1734       }
   1735       VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
   1736       if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
   1737         // mov ip, imm32
   1738         // vdup.16 d0, ip
   1739         UseScratchRegisterScope temps(this);
   1740         Register scratch = temps.Acquire();
   1741         {
   1742           CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   1743           mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
   1744         }
   1745         DataTypeValue vdup_dt = Untyped32;
   1746         switch (dt.GetValue()) {
   1747           case I16:
   1748             vdup_dt = Untyped16;
   1749             break;
   1750           case I32:
   1751             vdup_dt = Untyped32;
   1752             break;
   1753           default:
   1754             VIXL_UNREACHABLE();
   1755         }
   1756         CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1757         vdup(cond, vdup_dt, rd, scratch);
   1758         return;
   1759       }
   1760       if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
   1761         float f = neon_imm.GetImmediate<float>();
   1762         // Punt to vmov.i32
   1763         // TODO: The scope length was guessed based on the double case below. We
   1764         // should analyse the worst-case size and add targetted tests.
   1765         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1766         vmov(cond, I32, rd, FloatToRawbits(f));
   1767         return;
   1768       }
   1769       if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
   1770         // Punt to vmov.i64
   1771         double d = neon_imm.GetImmediate<double>();
   1772         // TODO: The scope length was measured empirically. We should analyse
   1773         // the
   1774         // worst-case size and add targetted tests.
   1775         CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
   1776         vmov(cond, I64, rd, DoubleToRawbits(d));
   1777         return;
   1778       }
   1779     }
   1780   }
   1781   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
   1782 }
   1783 
   1784 
   1785 void MacroAssembler::Delegate(InstructionType type,
   1786                               InstructionCondDtQQop instruction,
   1787                               Condition cond,
   1788                               DataType dt,
   1789                               QRegister rd,
   1790                               const QOperand& operand) {
   1791   CONTEXT_SCOPE;
   1792   if (type == kVmov) {
   1793     if (operand.IsImmediate()) {
   1794       const NeonImmediate& neon_imm = operand.GetNeonImmediate();
   1795       switch (dt.GetValue()) {
   1796         case I32:
   1797           if (neon_imm.CanConvert<uint32_t>()) {
   1798             uint32_t imm = neon_imm.GetImmediate<uint32_t>();
   1799             // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
   1800             if (IsI8BitPattern(imm)) {
   1801               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1802               vmov(cond, I8, rd, imm & 0xff);
   1803               return;
   1804             }
   1805             // vmov.i32 d0, 0xff0000ff will translate into
   1806             // vmov.i64 d0, 0xff0000ffff0000ff
   1807             if (IsI64BitPattern(imm)) {
   1808               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1809               vmov(cond, I64, rd, replicate<uint64_t>(imm));
   1810               return;
   1811             }
   1812             // vmov.i32 d0, 0xffab0000 will translate into
   1813             // vmvn.i32 d0, 0x0054ffff
   1814             if (CanBeInverted(imm)) {
   1815               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1816               vmvn(cond, I32, rd, ~imm);
   1817               return;
   1818             }
   1819           }
   1820           break;
   1821         case I16:
   1822           if (neon_imm.CanConvert<uint16_t>()) {
   1823             uint16_t imm = neon_imm.GetImmediate<uint16_t>();
   1824             // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
   1825             if (IsI8BitPattern(imm)) {
   1826               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1827               vmov(cond, I8, rd, imm & 0xff);
   1828               return;
   1829             }
   1830           }
   1831           break;
   1832         case I64:
   1833           if (neon_imm.CanConvert<uint64_t>()) {
   1834             uint64_t imm = neon_imm.GetImmediate<uint64_t>();
   1835             // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
   1836             if (IsI8BitPattern(imm)) {
   1837               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1838               vmov(cond, I8, rd, imm & 0xff);
   1839               return;
   1840             }
   1841             // mov ip, lo(imm64)
   1842             // vdup q0, ip
   1843             // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
   1844             // preserved
   1845             {
   1846               UseScratchRegisterScope temps(this);
   1847               Register scratch = temps.Acquire();
   1848               {
   1849                 CodeBufferCheckScope scope(this,
   1850                                            2 * kMaxInstructionSizeInBytes);
   1851                 mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
   1852               }
   1853               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1854               vdup(cond, Untyped32, rd, scratch);
   1855             }
   1856             // mov ip, hi(imm64)
   1857             // vmov.i32 d0[1], ip
   1858             // vmov d1, d0
   1859             {
   1860               UseScratchRegisterScope temps(this);
   1861               Register scratch = temps.Acquire();
   1862               {
   1863                 CodeBufferCheckScope scope(this,
   1864                                            2 * kMaxInstructionSizeInBytes);
   1865                 mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
   1866               }
   1867               {
   1868                 CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1869                 vmov(cond,
   1870                      Untyped32,
   1871                      DRegisterLane(rd.GetLowDRegister(), 1),
   1872                      scratch);
   1873               }
   1874               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1875               vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
   1876             }
   1877             return;
   1878           }
   1879           break;
   1880         default:
   1881           break;
   1882       }
   1883       VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
   1884       if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
   1885         // mov ip, imm32
   1886         // vdup.16 d0, ip
   1887         UseScratchRegisterScope temps(this);
   1888         Register scratch = temps.Acquire();
   1889         {
   1890           CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   1891           mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
   1892         }
   1893         DataTypeValue vdup_dt = Untyped32;
   1894         switch (dt.GetValue()) {
   1895           case I16:
   1896             vdup_dt = Untyped16;
   1897             break;
   1898           case I32:
   1899             vdup_dt = Untyped32;
   1900             break;
   1901           default:
   1902             VIXL_UNREACHABLE();
   1903         }
   1904         CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   1905         vdup(cond, vdup_dt, rd, scratch);
   1906         return;
   1907       }
   1908       if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
   1909         // Punt to vmov.i64
   1910         float f = neon_imm.GetImmediate<float>();
   1911         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   1912         vmov(cond, I32, rd, FloatToRawbits(f));
   1913         return;
   1914       }
   1915       if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
   1916         // Use vmov to create the double in the low D register, then duplicate
   1917         // it into the high D register.
   1918         double d = neon_imm.GetImmediate<double>();
   1919         CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
   1920         vmov(cond, F64, rd.GetLowDRegister(), d);
   1921         vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
   1922         return;
   1923       }
   1924     }
   1925   }
   1926   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
   1927 }
   1928 
   1929 
   1930 void MacroAssembler::Delegate(InstructionType type,
   1931                               InstructionCondRL instruction,
   1932                               Condition cond,
   1933                               Register rt,
   1934                               Label* label) {
   1935   VIXL_ASSERT((type == kLdrb) || (type == kLdrh) || (type == kLdrsb) ||
   1936               (type == kLdrsh));
   1937 
   1938   CONTEXT_SCOPE;
   1939 
   1940   if (label->IsBound()) {
   1941     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
   1942     UseScratchRegisterScope temps(this);
   1943     temps.Include(rt);
   1944     Register scratch = temps.Acquire();
   1945     uint32_t mask = GetOffsetMask(type, Offset);
   1946     switch (type) {
   1947       case kLdrb:
   1948         ldrb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
   1949         return;
   1950       case kLdrh:
   1951         ldrh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
   1952         return;
   1953       case kLdrsb:
   1954         ldrsb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
   1955         return;
   1956       case kLdrsh:
   1957         ldrsh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
   1958         return;
   1959       default:
   1960         VIXL_UNREACHABLE();
   1961     }
   1962     return;
   1963   }
   1964 
   1965   Assembler::Delegate(type, instruction, cond, rt, label);
   1966 }
   1967 
   1968 
   1969 void MacroAssembler::Delegate(InstructionType type,
   1970                               InstructionCondRRL instruction,
   1971                               Condition cond,
   1972                               Register rt,
   1973                               Register rt2,
   1974                               Label* label) {
   1975   VIXL_ASSERT(type == kLdrd);
   1976 
   1977   CONTEXT_SCOPE;
   1978 
   1979   if (label->IsBound()) {
   1980     CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
   1981     UseScratchRegisterScope temps(this);
   1982     temps.Include(rt, rt2);
   1983     Register scratch = temps.Acquire();
   1984     uint32_t mask = GetOffsetMask(type, Offset);
   1985     ldrd(rt, rt2, MemOperandComputationHelper(cond, scratch, label, mask));
   1986     return;
   1987   }
   1988 
   1989   Assembler::Delegate(type, instruction, cond, rt, rt2, label);
   1990 }
   1991 
   1992 
   1993 void MacroAssembler::Delegate(InstructionType type,
   1994                               InstructionCondSizeRMop instruction,
   1995                               Condition cond,
   1996                               EncodingSize size,
   1997                               Register rd,
   1998                               const MemOperand& operand) {
   1999   CONTEXT_SCOPE;
   2000   VIXL_ASSERT(size.IsBest());
   2001   VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
   2002               (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
   2003               (type == kStrb) || (type == kStrh));
   2004   if (operand.IsImmediate()) {
   2005     const Register& rn = operand.GetBaseRegister();
   2006     AddrMode addrmode = operand.GetAddrMode();
   2007     int32_t offset = operand.GetOffsetImmediate();
   2008     uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
   2009     // Try to maximize the offset used by the MemOperand (load_store_offset).
   2010     // Add the part which can't be used by the MemOperand (add_offset).
   2011     uint32_t load_store_offset = offset & extra_offset_mask;
   2012     uint32_t add_offset = offset & ~extra_offset_mask;
   2013     if ((add_offset != 0) &&
   2014         (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
   2015       load_store_offset = 0;
   2016       add_offset = offset;
   2017     }
   2018     switch (addrmode) {
   2019       case PreIndex:
   2020         // Avoid the unpredictable case 'str r0, [r0, imm]!'
   2021         if (!rn.Is(rd)) {
   2022           // Pre-Indexed case:
   2023           // ldr r0, [r1, 12345]! will translate into
   2024           //   add r1, r1, 12345
   2025           //   ldr r0, [r1]
   2026           {
   2027             CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2028             add(cond, rn, rn, add_offset);
   2029           }
   2030           {
   2031             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2032             (this->*instruction)(cond,
   2033                                  size,
   2034                                  rd,
   2035                                  MemOperand(rn, load_store_offset, PreIndex));
   2036           }
   2037           return;
   2038         }
   2039         break;
   2040       case Offset: {
   2041         UseScratchRegisterScope temps(this);
   2042         // Allow using the destination as a scratch register if possible.
   2043         if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
   2044             !rd.Is(rn)) {
   2045           temps.Include(rd);
   2046         }
   2047         Register scratch = temps.Acquire();
   2048         // Offset case:
   2049         // ldr r0, [r1, 12345] will translate into
   2050         //   add r0, r1, 12345
   2051         //   ldr r0, [r0]
   2052         {
   2053           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2054           add(cond, scratch, rn, add_offset);
   2055         }
   2056         {
   2057           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2058           (this->*instruction)(cond,
   2059                                size,
   2060                                rd,
   2061                                MemOperand(scratch, load_store_offset));
   2062         }
   2063         return;
   2064       }
   2065       case PostIndex:
   2066         // Avoid the unpredictable case 'ldr r0, [r0], imm'
   2067         if (!rn.Is(rd)) {
   2068           // Post-indexed case:
   2069           // ldr r0. [r1], imm32 will translate into
   2070           //   ldr r0, [r1]
   2071           //   movw ip. imm32 & 0xffffffff
   2072           //   movt ip, imm32 >> 16
   2073           //   add r1, r1, ip
   2074           {
   2075             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2076             (this->*instruction)(cond,
   2077                                  size,
   2078                                  rd,
   2079                                  MemOperand(rn, load_store_offset, PostIndex));
   2080           }
   2081           {
   2082             CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2083             add(cond, rn, rn, add_offset);
   2084           }
   2085           return;
   2086         }
   2087         break;
   2088     }
   2089   } else if (operand.IsPlainRegister()) {
   2090     const Register& rn = operand.GetBaseRegister();
   2091     AddrMode addrmode = operand.GetAddrMode();
   2092     const Register& rm = operand.GetOffsetRegister();
   2093     if (rm.IsPC()) {
   2094       VIXL_ABORT_WITH_MSG(
   2095           "The MacroAssembler does not convert loads and stores with a PC "
   2096           "offset register.\n");
   2097     }
   2098     if (rn.IsPC()) {
   2099       if (addrmode == Offset) {
   2100         if (IsUsingT32()) {
   2101           VIXL_ABORT_WITH_MSG(
   2102               "The MacroAssembler does not convert loads and stores with a PC "
   2103               "base register for T32.\n");
   2104         }
   2105       } else {
   2106         VIXL_ABORT_WITH_MSG(
   2107             "The MacroAssembler does not convert loads and stores with a PC "
   2108             "base register in pre-index or post-index mode.\n");
   2109       }
   2110     }
   2111     switch (addrmode) {
   2112       case PreIndex:
   2113         // Avoid the unpredictable case 'str r0, [r0, imm]!'
   2114         if (!rn.Is(rd)) {
   2115           // Pre-Indexed case:
   2116           // ldr r0, [r1, r2]! will translate into
   2117           //   add r1, r1, r2
   2118           //   ldr r0, [r1]
   2119           {
   2120             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2121             if (operand.GetSign().IsPlus()) {
   2122               add(cond, rn, rn, rm);
   2123             } else {
   2124               sub(cond, rn, rn, rm);
   2125             }
   2126           }
   2127           {
   2128             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2129             (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
   2130           }
   2131           return;
   2132         }
   2133         break;
   2134       case Offset: {
   2135         UseScratchRegisterScope temps(this);
   2136         // Allow using the destination as a scratch register if this is not a
   2137         // store.
   2138         // Avoid using PC as a temporary as this has side-effects.
   2139         if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
   2140             !rd.IsPC()) {
   2141           temps.Include(rd);
   2142         }
   2143         Register scratch = temps.Acquire();
   2144         // Offset case:
   2145         // ldr r0, [r1, r2] will translate into
   2146         //   add r0, r1, r2
   2147         //   ldr r0, [r0]
   2148         {
   2149           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2150           if (operand.GetSign().IsPlus()) {
   2151             add(cond, scratch, rn, rm);
   2152           } else {
   2153             sub(cond, scratch, rn, rm);
   2154           }
   2155         }
   2156         {
   2157           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2158           (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
   2159         }
   2160         return;
   2161       }
   2162       case PostIndex:
   2163         // Avoid the unpredictable case 'ldr r0, [r0], imm'
   2164         if (!rn.Is(rd)) {
   2165           // Post-indexed case:
   2166           // ldr r0. [r1], r2 will translate into
   2167           //   ldr r0, [r1]
   2168           //   add r1, r1, r2
   2169           {
   2170             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2171             (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
   2172           }
   2173           {
   2174             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2175             if (operand.GetSign().IsPlus()) {
   2176               add(cond, rn, rn, rm);
   2177             } else {
   2178               sub(cond, rn, rn, rm);
   2179             }
   2180           }
   2181           return;
   2182         }
   2183         break;
   2184     }
   2185   }
   2186   Assembler::Delegate(type, instruction, cond, size, rd, operand);
   2187 }
   2188 
   2189 
   2190 void MacroAssembler::Delegate(InstructionType type,
   2191                               InstructionCondRRMop instruction,
   2192                               Condition cond,
   2193                               Register rt,
   2194                               Register rt2,
   2195                               const MemOperand& operand) {
   2196   if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
   2197       (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
   2198       (type == kStrexb) || (type == kStrexh)) {
   2199     UnimplementedDelegate(type);
   2200     return;
   2201   }
   2202 
   2203   VIXL_ASSERT((type == kLdrd) || (type == kStrd));
   2204 
   2205   CONTEXT_SCOPE;
   2206 
   2207   // TODO: Should we allow these cases?
   2208   if (IsUsingA32()) {
   2209     // The first register needs to be even.
   2210     if ((rt.GetCode() & 1) != 0) {
   2211       UnimplementedDelegate(type);
   2212       return;
   2213     }
   2214     // Registers need to be adjacent.
   2215     if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
   2216       UnimplementedDelegate(type);
   2217       return;
   2218     }
   2219     // LDRD lr, pc [...] is not allowed.
   2220     if (rt.Is(lr)) {
   2221       UnimplementedDelegate(type);
   2222       return;
   2223     }
   2224   }
   2225 
   2226   if (operand.IsImmediate()) {
   2227     const Register& rn = operand.GetBaseRegister();
   2228     AddrMode addrmode = operand.GetAddrMode();
   2229     int32_t offset = operand.GetOffsetImmediate();
   2230     uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
   2231     // Try to maximize the offset used by the MemOperand (load_store_offset).
   2232     // Add the part which can't be used by the MemOperand (add_offset).
   2233     uint32_t load_store_offset = offset & extra_offset_mask;
   2234     uint32_t add_offset = offset & ~extra_offset_mask;
   2235     if ((add_offset != 0) &&
   2236         (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
   2237       load_store_offset = 0;
   2238       add_offset = offset;
   2239     }
   2240     switch (addrmode) {
   2241       case PreIndex: {
   2242         // Allow using the destinations as a scratch registers if possible.
   2243         UseScratchRegisterScope temps(this);
   2244         if (type == kLdrd) {
   2245           if (!rt.Is(rn)) temps.Include(rt);
   2246           if (!rt2.Is(rn)) temps.Include(rt2);
   2247         }
   2248 
   2249         // Pre-Indexed case:
   2250         // ldrd r0, r1, [r2, 12345]! will translate into
   2251         //   add r2, 12345
   2252         //   ldrd r0, r1, [r2]
   2253         {
   2254           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2255           add(cond, rn, rn, add_offset);
   2256         }
   2257         {
   2258           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2259           (this->*instruction)(cond,
   2260                                rt,
   2261                                rt2,
   2262                                MemOperand(rn, load_store_offset, PreIndex));
   2263         }
   2264         return;
   2265       }
   2266       case Offset: {
   2267         UseScratchRegisterScope temps(this);
   2268         // Allow using the destinations as a scratch registers if possible.
   2269         if (type == kLdrd) {
   2270           if (!rt.Is(rn)) temps.Include(rt);
   2271           if (!rt2.Is(rn)) temps.Include(rt2);
   2272         }
   2273         Register scratch = temps.Acquire();
   2274         // Offset case:
   2275         // ldrd r0, r1, [r2, 12345] will translate into
   2276         //   add r0, r2, 12345
   2277         //   ldrd r0, r1, [r0]
   2278         {
   2279           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2280           add(cond, scratch, rn, add_offset);
   2281         }
   2282         {
   2283           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2284           (this->*instruction)(cond,
   2285                                rt,
   2286                                rt2,
   2287                                MemOperand(scratch, load_store_offset));
   2288         }
   2289         return;
   2290       }
   2291       case PostIndex:
   2292         // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
   2293         if (!rn.Is(rt) && !rn.Is(rt2)) {
   2294           // Post-indexed case:
   2295           // ldrd r0, r1, [r2], imm32 will translate into
   2296           //   ldrd r0, r1, [r2]
   2297           //   movw ip. imm32 & 0xffffffff
   2298           //   movt ip, imm32 >> 16
   2299           //   add r2, ip
   2300           {
   2301             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2302             (this->*instruction)(cond,
   2303                                  rt,
   2304                                  rt2,
   2305                                  MemOperand(rn, load_store_offset, PostIndex));
   2306           }
   2307           {
   2308             CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2309             add(cond, rn, rn, add_offset);
   2310           }
   2311           return;
   2312         }
   2313         break;
   2314     }
   2315   }
   2316   if (operand.IsPlainRegister()) {
   2317     const Register& rn = operand.GetBaseRegister();
   2318     const Register& rm = operand.GetOffsetRegister();
   2319     AddrMode addrmode = operand.GetAddrMode();
   2320     switch (addrmode) {
   2321       case PreIndex:
   2322         // ldrd r0, r1, [r2, r3]! will translate into
   2323         //   add r2, r3
   2324         //   ldrd r0, r1, [r2]
   2325         {
   2326           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2327           if (operand.GetSign().IsPlus()) {
   2328             add(cond, rn, rn, rm);
   2329           } else {
   2330             sub(cond, rn, rn, rm);
   2331           }
   2332         }
   2333         {
   2334           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2335           (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
   2336         }
   2337         return;
   2338       case PostIndex:
   2339         // ldrd r0, r1, [r2], r3 will translate into
   2340         //   ldrd r0, r1, [r2]
   2341         //   add r2, r3
   2342         {
   2343           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2344           (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
   2345         }
   2346         {
   2347           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2348           if (operand.GetSign().IsPlus()) {
   2349             add(cond, rn, rn, rm);
   2350           } else {
   2351             sub(cond, rn, rn, rm);
   2352           }
   2353         }
   2354         return;
   2355       case Offset: {
   2356         UseScratchRegisterScope temps(this);
   2357         // Allow using the destinations as a scratch registers if possible.
   2358         if (type == kLdrd) {
   2359           if (!rt.Is(rn)) temps.Include(rt);
   2360           if (!rt2.Is(rn)) temps.Include(rt2);
   2361         }
   2362         Register scratch = temps.Acquire();
   2363         // Offset case:
   2364         // ldrd r0, r1, [r2, r3] will translate into
   2365         //   add r0, r2, r3
   2366         //   ldrd r0, r1, [r0]
   2367         {
   2368           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2369           if (operand.GetSign().IsPlus()) {
   2370             add(cond, scratch, rn, rm);
   2371           } else {
   2372             sub(cond, scratch, rn, rm);
   2373           }
   2374         }
   2375         {
   2376           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2377           (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
   2378         }
   2379         return;
   2380       }
   2381     }
   2382   }
   2383   Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
   2384 }
   2385 
   2386 
   2387 void MacroAssembler::Delegate(InstructionType type,
   2388                               InstructionCondDtSMop instruction,
   2389                               Condition cond,
   2390                               DataType dt,
   2391                               SRegister rd,
   2392                               const MemOperand& operand) {
   2393   CONTEXT_SCOPE;
   2394   if (operand.IsImmediate()) {
   2395     const Register& rn = operand.GetBaseRegister();
   2396     AddrMode addrmode = operand.GetAddrMode();
   2397     int32_t offset = operand.GetOffsetImmediate();
   2398     VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
   2399                 ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
   2400     if (rn.IsPC()) {
   2401       VIXL_ABORT_WITH_MSG(
   2402           "The MacroAssembler does not convert vldr or vstr with a PC base "
   2403           "register.\n");
   2404     }
   2405     switch (addrmode) {
   2406       case PreIndex:
   2407         // Pre-Indexed case:
   2408         // vldr.32 s0, [r1, 12345]! will translate into
   2409         //   add r1, 12345
   2410         //   vldr.32 s0, [r1]
   2411         if (offset != 0) {
   2412           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2413           add(cond, rn, rn, offset);
   2414         }
   2415         {
   2416           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2417           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
   2418         }
   2419         return;
   2420       case Offset: {
   2421         UseScratchRegisterScope temps(this);
   2422         Register scratch = temps.Acquire();
   2423         // Offset case:
   2424         // vldr.32 s0, [r1, 12345] will translate into
   2425         //   add ip, r1, 12345
   2426         //   vldr.32 s0, [ip]
   2427         {
   2428           VIXL_ASSERT(offset != 0);
   2429           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2430           add(cond, scratch, rn, offset);
   2431         }
   2432         {
   2433           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2434           (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
   2435         }
   2436         return;
   2437       }
   2438       case PostIndex:
   2439         // Post-indexed case:
   2440         // vldr.32 s0, [r1], imm32 will translate into
   2441         //   vldr.32 s0, [r1]
   2442         //   movw ip. imm32 & 0xffffffff
   2443         //   movt ip, imm32 >> 16
   2444         //   add r1, ip
   2445         {
   2446           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2447           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
   2448         }
   2449         if (offset != 0) {
   2450           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2451           add(cond, rn, rn, offset);
   2452         }
   2453         return;
   2454     }
   2455   }
   2456   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
   2457 }
   2458 
   2459 
   2460 void MacroAssembler::Delegate(InstructionType type,
   2461                               InstructionCondDtDMop instruction,
   2462                               Condition cond,
   2463                               DataType dt,
   2464                               DRegister rd,
   2465                               const MemOperand& operand) {
   2466   CONTEXT_SCOPE;
   2467   if (operand.IsImmediate()) {
   2468     const Register& rn = operand.GetBaseRegister();
   2469     AddrMode addrmode = operand.GetAddrMode();
   2470     int32_t offset = operand.GetOffsetImmediate();
   2471     VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
   2472                 ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
   2473     if (rn.IsPC()) {
   2474       VIXL_ABORT_WITH_MSG(
   2475           "The MacroAssembler does not convert vldr or vstr with a PC base "
   2476           "register.\n");
   2477     }
   2478     switch (addrmode) {
   2479       case PreIndex:
   2480         // Pre-Indexed case:
   2481         // vldr.64 d0, [r1, 12345]! will translate into
   2482         //   add r1, 12345
   2483         //   vldr.64 d0, [r1]
   2484         if (offset != 0) {
   2485           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2486           add(cond, rn, rn, offset);
   2487         }
   2488         {
   2489           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2490           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
   2491         }
   2492         return;
   2493       case Offset: {
   2494         UseScratchRegisterScope temps(this);
   2495         Register scratch = temps.Acquire();
   2496         // Offset case:
   2497         // vldr.64 d0, [r1, 12345] will translate into
   2498         //   add ip, r1, 12345
   2499         //   vldr.32 s0, [ip]
   2500         {
   2501           VIXL_ASSERT(offset != 0);
   2502           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2503           add(cond, scratch, rn, offset);
   2504         }
   2505         {
   2506           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2507           (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
   2508         }
   2509         return;
   2510       }
   2511       case PostIndex:
   2512         // Post-indexed case:
   2513         // vldr.64 d0. [r1], imm32 will translate into
   2514         //   vldr.64 d0, [r1]
   2515         //   movw ip. imm32 & 0xffffffff
   2516         //   movt ip, imm32 >> 16
   2517         //   add r1, ip
   2518         {
   2519           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2520           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
   2521         }
   2522         if (offset != 0) {
   2523           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
   2524           add(cond, rn, rn, offset);
   2525         }
   2526         return;
   2527     }
   2528   }
   2529   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
   2530 }
   2531 
   2532 
   2533 void MacroAssembler::Delegate(InstructionType type,
   2534                               InstructionCondMsrOp instruction,
   2535                               Condition cond,
   2536                               MaskedSpecialRegister spec_reg,
   2537                               const Operand& operand) {
   2538   USE(type);
   2539   VIXL_ASSERT(type == kMsr);
   2540   if (operand.IsImmediate()) {
   2541     UseScratchRegisterScope temps(this);
   2542     Register scratch = temps.Acquire();
   2543     {
   2544       CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   2545       mov(cond, scratch, operand);
   2546     }
   2547     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
   2548     msr(cond, spec_reg, scratch);
   2549     return;
   2550   }
   2551   Assembler::Delegate(type, instruction, cond, spec_reg, operand);
   2552 }
   2553 
   2554 
   2555 void MacroAssembler::Delegate(InstructionType type,
   2556                               InstructionCondDtDL instruction,
   2557                               Condition cond,
   2558                               DataType dt,
   2559                               DRegister rd,
   2560                               Label* label) {
   2561   VIXL_ASSERT(type == kVldr);
   2562 
   2563   CONTEXT_SCOPE;
   2564 
   2565   if (label->IsBound()) {
   2566     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
   2567     UseScratchRegisterScope temps(this);
   2568     Register scratch = temps.Acquire();
   2569     uint32_t mask = GetOffsetMask(type, Offset);
   2570     vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
   2571     return;
   2572   }
   2573 
   2574   Assembler::Delegate(type, instruction, cond, dt, rd, label);
   2575 }
   2576 
   2577 
   2578 void MacroAssembler::Delegate(InstructionType type,
   2579                               InstructionCondDtSL instruction,
   2580                               Condition cond,
   2581                               DataType dt,
   2582                               SRegister rd,
   2583                               Label* label) {
   2584   VIXL_ASSERT(type == kVldr);
   2585 
   2586   CONTEXT_SCOPE;
   2587 
   2588   if (label->IsBound()) {
   2589     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
   2590     UseScratchRegisterScope temps(this);
   2591     Register scratch = temps.Acquire();
   2592     uint32_t mask = GetOffsetMask(type, Offset);
   2593     vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
   2594     return;
   2595   }
   2596 
   2597   Assembler::Delegate(type, instruction, cond, dt, rd, label);
   2598 }
   2599 
   2600 
   2601 #undef CONTEXT_SCOPE
   2602 #undef TOSTRING
   2603 #undef STRINGIFY
   2604 
   2605 // Start of generated code.
   2606 // End of generated code.
   2607 }  // namespace aarch32
   2608 }  // namespace vixl
   2609