Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_arm64.h"
     18 
     19 #include "arch/arm64/asm_support_arm64.h"
     20 #include "arch/arm64/instruction_set_features_arm64.h"
     21 #include "art_method.h"
     22 #include "base/bit_utils.h"
     23 #include "base/bit_utils_iterator.h"
     24 #include "code_generator_utils.h"
     25 #include "compiled_method.h"
     26 #include "entrypoints/quick/quick_entrypoints.h"
     27 #include "entrypoints/quick/quick_entrypoints_enum.h"
     28 #include "gc/accounting/card_table.h"
     29 #include "intrinsics.h"
     30 #include "intrinsics_arm64.h"
     31 #include "linker/arm64/relative_patcher_arm64.h"
     32 #include "mirror/array-inl.h"
     33 #include "mirror/class-inl.h"
     34 #include "lock_word.h"
     35 #include "offsets.h"
     36 #include "thread.h"
     37 #include "utils/arm64/assembler_arm64.h"
     38 #include "utils/assembler.h"
     39 #include "utils/stack_checks.h"
     40 
     41 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
     42 using vixl::ExactAssemblyScope;
     43 using vixl::CodeBufferCheckScope;
     44 using vixl::EmissionCheckScope;
     45 
     46 #ifdef __
     47 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
     48 #endif
     49 
     50 namespace art {
     51 
     52 template<class MirrorType>
     53 class GcRoot;
     54 
     55 namespace arm64 {
     56 
     57 using helpers::ARM64EncodableConstantOrRegister;
     58 using helpers::ArtVixlRegCodeCoherentForRegSet;
     59 using helpers::CPURegisterFrom;
     60 using helpers::DRegisterFrom;
     61 using helpers::FPRegisterFrom;
     62 using helpers::HeapOperand;
     63 using helpers::HeapOperandFrom;
     64 using helpers::InputCPURegisterAt;
     65 using helpers::InputCPURegisterOrZeroRegAt;
     66 using helpers::InputFPRegisterAt;
     67 using helpers::InputOperandAt;
     68 using helpers::InputRegisterAt;
     69 using helpers::Int64ConstantFrom;
     70 using helpers::IsConstantZeroBitPattern;
     71 using helpers::LocationFrom;
     72 using helpers::OperandFromMemOperand;
     73 using helpers::OutputCPURegister;
     74 using helpers::OutputFPRegister;
     75 using helpers::OutputRegister;
     76 using helpers::QRegisterFrom;
     77 using helpers::RegisterFrom;
     78 using helpers::StackOperandFrom;
     79 using helpers::VIXLRegCodeFromART;
     80 using helpers::WRegisterFrom;
     81 using helpers::XRegisterFrom;
     82 
     83 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
     84 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
     85 // generates less code/data with a small num_entries.
     86 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
     87 
     88 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
     89 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
     90 // For the Baker read barrier implementation using link-generated thunks we need to split
     91 // the offset explicitly.
     92 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
     93 
     94 // Flags controlling the use of link-time generated thunks for Baker read barriers.
     95 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
     96 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
     97 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
     98 
     99 // Some instructions have special requirements for a temporary, for example
    100 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
    101 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
    102 // loads with large offsets need a fixed register to limit the number of link-time
    103 // thunks we generate. For these and similar cases, we want to reserve a specific
    104 // register that's neither callee-save nor an argument register. We choose x15.
    105 inline Location FixedTempLocation() {
    106   return Location::RegisterLocation(x15.GetCode());
    107 }
    108 
    109 inline Condition ARM64Condition(IfCondition cond) {
    110   switch (cond) {
    111     case kCondEQ: return eq;
    112     case kCondNE: return ne;
    113     case kCondLT: return lt;
    114     case kCondLE: return le;
    115     case kCondGT: return gt;
    116     case kCondGE: return ge;
    117     case kCondB:  return lo;
    118     case kCondBE: return ls;
    119     case kCondA:  return hi;
    120     case kCondAE: return hs;
    121   }
    122   LOG(FATAL) << "Unreachable";
    123   UNREACHABLE();
    124 }
    125 
    126 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
    127   // The ARM64 condition codes can express all the necessary branches, see the
    128   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
    129   // There is no dex instruction or HIR that would need the missing conditions
    130   // "equal or unordered" or "not equal".
    131   switch (cond) {
    132     case kCondEQ: return eq;
    133     case kCondNE: return ne /* unordered */;
    134     case kCondLT: return gt_bias ? cc : lt /* unordered */;
    135     case kCondLE: return gt_bias ? ls : le /* unordered */;
    136     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
    137     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
    138     default:
    139       LOG(FATAL) << "UNREACHABLE";
    140       UNREACHABLE();
    141   }
    142 }
    143 
    144 Location ARM64ReturnLocation(Primitive::Type return_type) {
    145   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
    146   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
    147   // but we use the exact registers for clarity.
    148   if (return_type == Primitive::kPrimFloat) {
    149     return LocationFrom(s0);
    150   } else if (return_type == Primitive::kPrimDouble) {
    151     return LocationFrom(d0);
    152   } else if (return_type == Primitive::kPrimLong) {
    153     return LocationFrom(x0);
    154   } else if (return_type == Primitive::kPrimVoid) {
    155     return Location::NoLocation();
    156   } else {
    157     return LocationFrom(w0);
    158   }
    159 }
    160 
    161 Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) {
    162   return ARM64ReturnLocation(return_type);
    163 }
    164 
    165 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
    166 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
    167 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
    168 
    169 // Calculate memory accessing operand for save/restore live registers.
    170 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
    171                                            LocationSummary* locations,
    172                                            int64_t spill_offset,
    173                                            bool is_save) {
    174   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
    175   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
    176   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
    177                                          codegen->GetNumberOfCoreRegisters(),
    178                                          fp_spills,
    179                                          codegen->GetNumberOfFloatingPointRegisters()));
    180 
    181   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
    182   unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
    183   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
    184 
    185   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
    186   UseScratchRegisterScope temps(masm);
    187 
    188   Register base = masm->StackPointer();
    189   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
    190   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
    191   int64_t reg_size = kXRegSizeInBytes;
    192   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
    193   uint32_t ls_access_size = WhichPowerOf2(reg_size);
    194   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
    195       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
    196     // If the offset does not fit in the instruction's immediate field, use an alternate register
    197     // to compute the base address(float point registers spill base address).
    198     Register new_base = temps.AcquireSameSizeAs(base);
    199     __ Add(new_base, base, Operand(spill_offset + core_spill_size));
    200     base = new_base;
    201     spill_offset = -core_spill_size;
    202     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
    203     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
    204     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
    205   }
    206 
    207   if (is_save) {
    208     __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
    209     __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
    210   } else {
    211     __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
    212     __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
    213   }
    214 }
    215 
    216 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
    217   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
    218   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
    219   for (uint32_t i : LowToHighBits(core_spills)) {
    220     // If the register holds an object, update the stack mask.
    221     if (locations->RegisterContainsObject(i)) {
    222       locations->SetStackBit(stack_offset / kVRegSize);
    223     }
    224     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
    225     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
    226     saved_core_stack_offsets_[i] = stack_offset;
    227     stack_offset += kXRegSizeInBytes;
    228   }
    229 
    230   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
    231   for (uint32_t i : LowToHighBits(fp_spills)) {
    232     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
    233     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
    234     saved_fpu_stack_offsets_[i] = stack_offset;
    235     stack_offset += kDRegSizeInBytes;
    236   }
    237 
    238   SaveRestoreLiveRegistersHelper(codegen,
    239                                  locations,
    240                                  codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
    241 }
    242 
    243 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
    244   SaveRestoreLiveRegistersHelper(codegen,
    245                                  locations,
    246                                  codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
    247 }
    248 
    249 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
    250  public:
    251   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
    252 
    253   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    254     LocationSummary* locations = instruction_->GetLocations();
    255     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    256 
    257     __ Bind(GetEntryLabel());
    258     if (instruction_->CanThrowIntoCatchBlock()) {
    259       // Live registers will be restored in the catch block if caught.
    260       SaveLiveRegisters(codegen, instruction_->GetLocations());
    261     }
    262     // We're moving two locations to locations that could overlap, so we need a parallel
    263     // move resolver.
    264     InvokeRuntimeCallingConvention calling_convention;
    265     codegen->EmitParallelMoves(
    266         locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
    267         locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
    268     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
    269         ? kQuickThrowStringBounds
    270         : kQuickThrowArrayBounds;
    271     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
    272     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
    273     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
    274   }
    275 
    276   bool IsFatal() const OVERRIDE { return true; }
    277 
    278   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
    279 
    280  private:
    281   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
    282 };
    283 
    284 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
    285  public:
    286   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
    287 
    288   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    289     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    290     __ Bind(GetEntryLabel());
    291     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
    292     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
    293   }
    294 
    295   bool IsFatal() const OVERRIDE { return true; }
    296 
    297   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
    298 
    299  private:
    300   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
    301 };
    302 
    303 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
    304  public:
    305   LoadClassSlowPathARM64(HLoadClass* cls,
    306                          HInstruction* at,
    307                          uint32_t dex_pc,
    308                          bool do_clinit,
    309                          vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(),
    310                          vixl::aarch64::Label* bss_entry_adrp_label = nullptr)
    311       : SlowPathCodeARM64(at),
    312         cls_(cls),
    313         dex_pc_(dex_pc),
    314         do_clinit_(do_clinit),
    315         bss_entry_temp_(bss_entry_temp),
    316         bss_entry_adrp_label_(bss_entry_adrp_label) {
    317     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
    318   }
    319 
    320   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    321     LocationSummary* locations = instruction_->GetLocations();
    322     Location out = locations->Out();
    323     constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
    324     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    325 
    326     InvokeRuntimeCallingConvention calling_convention;
    327     // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp
    328     // register, make sure it's not clobbered by the call or by saving/restoring registers.
    329     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
    330     bool is_load_class_bss_entry =
    331         (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
    332     if (is_load_class_bss_entry) {
    333       DCHECK(bss_entry_temp_.IsValid());
    334       DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0)));
    335       DCHECK(
    336           !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_));
    337     }
    338 
    339     __ Bind(GetEntryLabel());
    340     SaveLiveRegisters(codegen, locations);
    341 
    342     dex::TypeIndex type_index = cls_->GetTypeIndex();
    343     __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
    344     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
    345                                                 : kQuickInitializeType;
    346     arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
    347     if (do_clinit_) {
    348       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
    349     } else {
    350       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
    351     }
    352 
    353     // Move the class to the desired location.
    354     if (out.IsValid()) {
    355       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    356       Primitive::Type type = instruction_->GetType();
    357       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
    358     }
    359     RestoreLiveRegisters(codegen, locations);
    360     // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
    361     if (is_load_class_bss_entry) {
    362       DCHECK(out.IsValid());
    363       const DexFile& dex_file = cls_->GetDexFile();
    364       if (call_saves_everything_except_r0_ip0) {
    365         // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything.
    366       } else {
    367         // For non-Baker read barrier, we need to re-calculate the address of the class entry page.
    368         bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
    369         arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_);
    370       }
    371       vixl::aarch64::Label* strp_label =
    372           arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_);
    373       {
    374         SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
    375         __ Bind(strp_label);
    376         __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
    377                MemOperand(bss_entry_temp_, /* offset placeholder */ 0));
    378       }
    379     }
    380     __ B(GetExitLabel());
    381   }
    382 
    383   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
    384 
    385  private:
    386   // The class this slow path will load.
    387   HLoadClass* const cls_;
    388 
    389   // The dex PC of `at_`.
    390   const uint32_t dex_pc_;
    391 
    392   // Whether to initialize the class.
    393   const bool do_clinit_;
    394 
    395   // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded.
    396   vixl::aarch64::Register bss_entry_temp_;
    397   vixl::aarch64::Label* bss_entry_adrp_label_;
    398 
    399   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
    400 };
    401 
    402 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
    403  public:
    404   LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
    405       : SlowPathCodeARM64(instruction),
    406         temp_(temp),
    407         adrp_label_(adrp_label) {}
    408 
    409   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    410     LocationSummary* locations = instruction_->GetLocations();
    411     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    412     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    413 
    414     InvokeRuntimeCallingConvention calling_convention;
    415     // Make sure `temp_` is not clobbered by the call or by saving/restoring registers.
    416     DCHECK(temp_.IsValid());
    417     DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0)));
    418     DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_));
    419 
    420     __ Bind(GetEntryLabel());
    421     SaveLiveRegisters(codegen, locations);
    422 
    423     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
    424     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
    425     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
    426     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
    427     Primitive::Type type = instruction_->GetType();
    428     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
    429 
    430     RestoreLiveRegisters(codegen, locations);
    431 
    432     // Store the resolved String to the BSS entry.
    433     const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
    434     if (!kUseReadBarrier || kUseBakerReadBarrier) {
    435       // The string entry page address was preserved in temp_ thanks to kSaveEverything.
    436     } else {
    437       // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
    438       adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
    439       arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
    440     }
    441     vixl::aarch64::Label* strp_label =
    442         arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
    443     {
    444       SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
    445       __ Bind(strp_label);
    446       __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
    447              MemOperand(temp_, /* offset placeholder */ 0));
    448     }
    449 
    450     __ B(GetExitLabel());
    451   }
    452 
    453   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
    454 
    455  private:
    456   const Register temp_;
    457   vixl::aarch64::Label* adrp_label_;
    458 
    459   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
    460 };
    461 
    462 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
    463  public:
    464   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
    465 
    466   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    467     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    468     __ Bind(GetEntryLabel());
    469     if (instruction_->CanThrowIntoCatchBlock()) {
    470       // Live registers will be restored in the catch block if caught.
    471       SaveLiveRegisters(codegen, instruction_->GetLocations());
    472     }
    473     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
    474                                  instruction_,
    475                                  instruction_->GetDexPc(),
    476                                  this);
    477     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
    478   }
    479 
    480   bool IsFatal() const OVERRIDE { return true; }
    481 
    482   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
    483 
    484  private:
    485   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
    486 };
    487 
    488 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
    489  public:
    490   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
    491       : SlowPathCodeARM64(instruction), successor_(successor) {}
    492 
    493   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    494     LocationSummary* locations = instruction_->GetLocations();
    495     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    496     __ Bind(GetEntryLabel());
    497     SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
    498     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
    499     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
    500     RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
    501     if (successor_ == nullptr) {
    502       __ B(GetReturnLabel());
    503     } else {
    504       __ B(arm64_codegen->GetLabelOf(successor_));
    505     }
    506   }
    507 
    508   vixl::aarch64::Label* GetReturnLabel() {
    509     DCHECK(successor_ == nullptr);
    510     return &return_label_;
    511   }
    512 
    513   HBasicBlock* GetSuccessor() const {
    514     return successor_;
    515   }
    516 
    517   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
    518 
    519  private:
    520   // If not null, the block to branch to after the suspend check.
    521   HBasicBlock* const successor_;
    522 
    523   // If `successor_` is null, the label to branch to after the suspend check.
    524   vixl::aarch64::Label return_label_;
    525 
    526   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
    527 };
    528 
    529 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
    530  public:
    531   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
    532       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
    533 
    534   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    535     LocationSummary* locations = instruction_->GetLocations();
    536 
    537     DCHECK(instruction_->IsCheckCast()
    538            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    539     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    540     uint32_t dex_pc = instruction_->GetDexPc();
    541 
    542     __ Bind(GetEntryLabel());
    543 
    544     if (!is_fatal_) {
    545       SaveLiveRegisters(codegen, locations);
    546     }
    547 
    548     // We're moving two locations to locations that could overlap, so we need a parallel
    549     // move resolver.
    550     InvokeRuntimeCallingConvention calling_convention;
    551     codegen->EmitParallelMoves(locations->InAt(0),
    552                                LocationFrom(calling_convention.GetRegisterAt(0)),
    553                                Primitive::kPrimNot,
    554                                locations->InAt(1),
    555                                LocationFrom(calling_convention.GetRegisterAt(1)),
    556                                Primitive::kPrimNot);
    557     if (instruction_->IsInstanceOf()) {
    558       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
    559       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
    560       Primitive::Type ret_type = instruction_->GetType();
    561       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
    562       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
    563     } else {
    564       DCHECK(instruction_->IsCheckCast());
    565       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
    566       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
    567     }
    568 
    569     if (!is_fatal_) {
    570       RestoreLiveRegisters(codegen, locations);
    571       __ B(GetExitLabel());
    572     }
    573   }
    574 
    575   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
    576   bool IsFatal() const OVERRIDE { return is_fatal_; }
    577 
    578  private:
    579   const bool is_fatal_;
    580 
    581   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
    582 };
    583 
    584 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
    585  public:
    586   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
    587       : SlowPathCodeARM64(instruction) {}
    588 
    589   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    590     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    591     __ Bind(GetEntryLabel());
    592     LocationSummary* locations = instruction_->GetLocations();
    593     SaveLiveRegisters(codegen, locations);
    594     InvokeRuntimeCallingConvention calling_convention;
    595     __ Mov(calling_convention.GetRegisterAt(0),
    596            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
    597     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
    598     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
    599   }
    600 
    601   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
    602 
    603  private:
    604   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
    605 };
    606 
    607 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
    608  public:
    609   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
    610 
    611   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    612     LocationSummary* locations = instruction_->GetLocations();
    613     __ Bind(GetEntryLabel());
    614     SaveLiveRegisters(codegen, locations);
    615 
    616     InvokeRuntimeCallingConvention calling_convention;
    617     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
    618     parallel_move.AddMove(
    619         locations->InAt(0),
    620         LocationFrom(calling_convention.GetRegisterAt(0)),
    621         Primitive::kPrimNot,
    622         nullptr);
    623     parallel_move.AddMove(
    624         locations->InAt(1),
    625         LocationFrom(calling_convention.GetRegisterAt(1)),
    626         Primitive::kPrimInt,
    627         nullptr);
    628     parallel_move.AddMove(
    629         locations->InAt(2),
    630         LocationFrom(calling_convention.GetRegisterAt(2)),
    631         Primitive::kPrimNot,
    632         nullptr);
    633     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    634 
    635     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    636     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
    637     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
    638     RestoreLiveRegisters(codegen, locations);
    639     __ B(GetExitLabel());
    640   }
    641 
    642   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
    643 
    644  private:
    645   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
    646 };
    647 
    648 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
    649   uint32_t num_entries = switch_instr_->GetNumEntries();
    650   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
    651 
    652   // We are about to use the assembler to place literals directly. Make sure we have enough
    653   // underlying code buffer and we have generated the jump table with right size.
    654   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
    655                            num_entries * sizeof(int32_t),
    656                            CodeBufferCheckScope::kExactSize);
    657 
    658   __ Bind(&table_start_);
    659   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
    660   for (uint32_t i = 0; i < num_entries; i++) {
    661     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
    662     DCHECK(target_label->IsBound());
    663     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
    664     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
    665     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
    666     Literal<int32_t> literal(jump_offset);
    667     __ place(&literal);
    668   }
    669 }
    670 
    671 // Abstract base class for read barrier slow paths marking a reference
    672 // `ref`.
    673 //
    674 // Argument `entrypoint` must be a register location holding the read
    675 // barrier marking runtime entry point to be invoked or an empty
    676 // location; in the latter case, the read barrier marking runtime
    677 // entry point will be loaded by the slow path code itself.
    678 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
    679  protected:
    680   ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
    681       : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
    682     DCHECK(kEmitCompilerReadBarrier);
    683   }
    684 
    685   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
    686 
    687   // Generate assembly code calling the read barrier marking runtime
    688   // entry point (ReadBarrierMarkRegX).
    689   void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
    690     // No need to save live registers; it's taken care of by the
    691     // entrypoint. Also, there is no need to update the stack mask,
    692     // as this runtime call will not trigger a garbage collection.
    693     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    694     DCHECK_NE(ref_.reg(), LR);
    695     DCHECK_NE(ref_.reg(), WSP);
    696     DCHECK_NE(ref_.reg(), WZR);
    697     // IP0 is used internally by the ReadBarrierMarkRegX entry point
    698     // as a temporary, it cannot be the entry point's input/output.
    699     DCHECK_NE(ref_.reg(), IP0);
    700     DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
    701     // "Compact" slow path, saving two moves.
    702     //
    703     // Instead of using the standard runtime calling convention (input
    704     // and output in W0):
    705     //
    706     //   W0 <- ref
    707     //   W0 <- ReadBarrierMark(W0)
    708     //   ref <- W0
    709     //
    710     // we just use rX (the register containing `ref`) as input and output
    711     // of a dedicated entrypoint:
    712     //
    713     //   rX <- ReadBarrierMarkRegX(rX)
    714     //
    715     if (entrypoint_.IsValid()) {
    716       arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
    717       __ Blr(XRegisterFrom(entrypoint_));
    718     } else {
    719       // Entrypoint is not already loaded, load from the thread.
    720       int32_t entry_point_offset =
    721           Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
    722       // This runtime call does not require a stack map.
    723       arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    724     }
    725   }
    726 
    727   // The location (register) of the marked object reference.
    728   const Location ref_;
    729 
    730   // The location of the entrypoint if it is already loaded.
    731   const Location entrypoint_;
    732 
    733  private:
    734   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
    735 };
    736 
    737 // Slow path marking an object reference `ref` during a read
    738 // barrier. The field `obj.field` in the object `obj` holding this
    739 // reference does not get updated by this slow path after marking.
    740 //
    741 // This means that after the execution of this slow path, `ref` will
    742 // always be up-to-date, but `obj.field` may not; i.e., after the
    743 // flip, `ref` will be a to-space reference, but `obj.field` will
    744 // probably still be a from-space reference (unless it gets updated by
    745 // another thread, or if another thread installed another object
    746 // reference (different from `ref`) in `obj.field`).
    747 //
    748 // Argument `entrypoint` must be a register location holding the read
    749 // barrier marking runtime entry point to be invoked or an empty
    750 // location; in the latter case, the read barrier marking runtime
    751 // entry point will be loaded by the slow path code itself.
    752 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
    753  public:
    754   ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
    755                                Location ref,
    756                                Location entrypoint = Location::NoLocation())
    757       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
    758     DCHECK(kEmitCompilerReadBarrier);
    759   }
    760 
    761   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
    762 
    763   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    764     LocationSummary* locations = instruction_->GetLocations();
    765     DCHECK(locations->CanCall());
    766     DCHECK(ref_.IsRegister()) << ref_;
    767     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
    768     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
    769         << "Unexpected instruction in read barrier marking slow path: "
    770         << instruction_->DebugName();
    771 
    772     __ Bind(GetEntryLabel());
    773     GenerateReadBarrierMarkRuntimeCall(codegen);
    774     __ B(GetExitLabel());
    775   }
    776 
    777  private:
    778   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
    779 };
    780 
    781 // Slow path loading `obj`'s lock word, loading a reference from
    782 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
    783 // marking `ref` if `obj` is gray according to the lock word (Baker
    784 // read barrier). The field `obj.field` in the object `obj` holding
    785 // this reference does not get updated by this slow path after marking
    786 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
    787 // below for that).
    788 //
    789 // This means that after the execution of this slow path, `ref` will
    790 // always be up-to-date, but `obj.field` may not; i.e., after the
    791 // flip, `ref` will be a to-space reference, but `obj.field` will
    792 // probably still be a from-space reference (unless it gets updated by
    793 // another thread, or if another thread installed another object
    794 // reference (different from `ref`) in `obj.field`).
    795 //
    796 // Argument `entrypoint` must be a register location holding the read
    797 // barrier marking runtime entry point to be invoked or an empty
    798 // location; in the latter case, the read barrier marking runtime
    799 // entry point will be loaded by the slow path code itself.
    800 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
    801  public:
    802   LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
    803                                                  Location ref,
    804                                                  Register obj,
    805                                                  uint32_t offset,
    806                                                  Location index,
    807                                                  size_t scale_factor,
    808                                                  bool needs_null_check,
    809                                                  bool use_load_acquire,
    810                                                  Register temp,
    811                                                  Location entrypoint = Location::NoLocation())
    812       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
    813         obj_(obj),
    814         offset_(offset),
    815         index_(index),
    816         scale_factor_(scale_factor),
    817         needs_null_check_(needs_null_check),
    818         use_load_acquire_(use_load_acquire),
    819         temp_(temp) {
    820     DCHECK(kEmitCompilerReadBarrier);
    821     DCHECK(kUseBakerReadBarrier);
    822   }
    823 
    824   const char* GetDescription() const OVERRIDE {
    825     return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
    826   }
    827 
    828   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    829     LocationSummary* locations = instruction_->GetLocations();
    830     DCHECK(locations->CanCall());
    831     DCHECK(ref_.IsRegister()) << ref_;
    832     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
    833     DCHECK(obj_.IsW());
    834     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
    835     DCHECK(instruction_->IsInstanceFieldGet() ||
    836            instruction_->IsStaticFieldGet() ||
    837            instruction_->IsArrayGet() ||
    838            instruction_->IsArraySet() ||
    839            instruction_->IsInstanceOf() ||
    840            instruction_->IsCheckCast() ||
    841            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
    842            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
    843         << "Unexpected instruction in read barrier marking slow path: "
    844         << instruction_->DebugName();
    845     // The read barrier instrumentation of object ArrayGet
    846     // instructions does not support the HIntermediateAddress
    847     // instruction.
    848     DCHECK(!(instruction_->IsArrayGet() &&
    849              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
    850 
    851     // Temporary register `temp_`, used to store the lock word, must
    852     // not be IP0 nor IP1, as we may use them to emit the reference
    853     // load (in the call to GenerateRawReferenceLoad below), and we
    854     // need the lock word to still be in `temp_` after the reference
    855     // load.
    856     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
    857     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
    858 
    859     __ Bind(GetEntryLabel());
    860 
    861     // When using MaybeGenerateReadBarrierSlow, the read barrier call is
    862     // inserted after the original load. However, in fast path based
    863     // Baker's read barriers, we need to perform the load of
    864     // mirror::Object::monitor_ *before* the original reference load.
    865     // This load-load ordering is required by the read barrier.
    866     // The slow path (for Baker's algorithm) should look like:
    867     //
    868     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
    869     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
    870     //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
    871     //   bool is_gray = (rb_state == ReadBarrier::GrayState());
    872     //   if (is_gray) {
    873     //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
    874     //   }
    875     //
    876     // Note: the original implementation in ReadBarrier::Barrier is
    877     // slightly more complex as it performs additional checks that we do
    878     // not do here for performance reasons.
    879 
    880     // /* int32_t */ monitor = obj->monitor_
    881     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
    882     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
    883     if (needs_null_check_) {
    884       codegen->MaybeRecordImplicitNullCheck(instruction_);
    885     }
    886     // /* LockWord */ lock_word = LockWord(monitor)
    887     static_assert(sizeof(LockWord) == sizeof(int32_t),
    888                   "art::LockWord and int32_t have different sizes.");
    889 
    890     // Introduce a dependency on the lock_word including rb_state,
    891     // to prevent load-load reordering, and without using
    892     // a memory barrier (which would be more expensive).
    893     // `obj` is unchanged by this operation, but its value now depends
    894     // on `temp`.
    895     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
    896 
    897     // The actual reference load.
    898     // A possible implicit null check has already been handled above.
    899     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    900     arm64_codegen->GenerateRawReferenceLoad(instruction_,
    901                                             ref_,
    902                                             obj_,
    903                                             offset_,
    904                                             index_,
    905                                             scale_factor_,
    906                                             /* needs_null_check */ false,
    907                                             use_load_acquire_);
    908 
    909     // Mark the object `ref` when `obj` is gray.
    910     //
    911     //   if (rb_state == ReadBarrier::GrayState())
    912     //     ref = ReadBarrier::Mark(ref);
    913     //
    914     // Given the numeric representation, it's enough to check the low bit of the rb_state.
    915     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
    916     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
    917     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
    918     GenerateReadBarrierMarkRuntimeCall(codegen);
    919 
    920     __ B(GetExitLabel());
    921   }
    922 
    923  private:
    924   // The register containing the object holding the marked object reference field.
    925   Register obj_;
    926   // The offset, index and scale factor to access the reference in `obj_`.
    927   uint32_t offset_;
    928   Location index_;
    929   size_t scale_factor_;
    930   // Is a null check required?
    931   bool needs_null_check_;
    932   // Should this reference load use Load-Acquire semantics?
    933   bool use_load_acquire_;
    934   // A temporary register used to hold the lock word of `obj_`.
    935   Register temp_;
    936 
    937   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
    938 };
    939 
    940 // Slow path loading `obj`'s lock word, loading a reference from
    941 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
    942 // marking `ref` if `obj` is gray according to the lock word (Baker
    943 // read barrier). If needed, this slow path also atomically updates
    944 // the field `obj.field` in the object `obj` holding this reference
    945 // after marking (contrary to
    946 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
    947 // tries to update `obj.field`).
    948 //
    949 // This means that after the execution of this slow path, both `ref`
    950 // and `obj.field` will be up-to-date; i.e., after the flip, both will
    951 // hold the same to-space reference (unless another thread installed
    952 // another object reference (different from `ref`) in `obj.field`).
    953 //
    954 // Argument `entrypoint` must be a register location holding the read
    955 // barrier marking runtime entry point to be invoked or an empty
    956 // location; in the latter case, the read barrier marking runtime
    957 // entry point will be loaded by the slow path code itself.
    958 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
    959     : public ReadBarrierMarkSlowPathBaseARM64 {
    960  public:
    961   LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
    962       HInstruction* instruction,
    963       Location ref,
    964       Register obj,
    965       uint32_t offset,
    966       Location index,
    967       size_t scale_factor,
    968       bool needs_null_check,
    969       bool use_load_acquire,
    970       Register temp,
    971       Location entrypoint = Location::NoLocation())
    972       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
    973         obj_(obj),
    974         offset_(offset),
    975         index_(index),
    976         scale_factor_(scale_factor),
    977         needs_null_check_(needs_null_check),
    978         use_load_acquire_(use_load_acquire),
    979         temp_(temp) {
    980     DCHECK(kEmitCompilerReadBarrier);
    981     DCHECK(kUseBakerReadBarrier);
    982   }
    983 
    984   const char* GetDescription() const OVERRIDE {
    985     return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
    986   }
    987 
    988   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    989     LocationSummary* locations = instruction_->GetLocations();
    990     Register ref_reg = WRegisterFrom(ref_);
    991     DCHECK(locations->CanCall());
    992     DCHECK(ref_.IsRegister()) << ref_;
    993     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
    994     DCHECK(obj_.IsW());
    995     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
    996 
    997     // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
    998     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
    999         << "Unexpected instruction in read barrier marking and field updating slow path: "
   1000         << instruction_->DebugName();
   1001     DCHECK(instruction_->GetLocations()->Intrinsified());
   1002     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
   1003     DCHECK_EQ(offset_, 0u);
   1004     DCHECK_EQ(scale_factor_, 0u);
   1005     DCHECK_EQ(use_load_acquire_, false);
   1006     // The location of the offset of the marked reference field within `obj_`.
   1007     Location field_offset = index_;
   1008     DCHECK(field_offset.IsRegister()) << field_offset;
   1009 
   1010     // Temporary register `temp_`, used to store the lock word, must
   1011     // not be IP0 nor IP1, as we may use them to emit the reference
   1012     // load (in the call to GenerateRawReferenceLoad below), and we
   1013     // need the lock word to still be in `temp_` after the reference
   1014     // load.
   1015     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
   1016     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
   1017 
   1018     __ Bind(GetEntryLabel());
   1019 
   1020     // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
   1021     //
   1022     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   1023     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   1024     //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
   1025     //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   1026     //   if (is_gray) {
   1027     //     old_ref = ref;
   1028     //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
   1029     //     compareAndSwapObject(obj, field_offset, old_ref, ref);
   1030     //   }
   1031 
   1032     // /* int32_t */ monitor = obj->monitor_
   1033     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   1034     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
   1035     if (needs_null_check_) {
   1036       codegen->MaybeRecordImplicitNullCheck(instruction_);
   1037     }
   1038     // /* LockWord */ lock_word = LockWord(monitor)
   1039     static_assert(sizeof(LockWord) == sizeof(int32_t),
   1040                   "art::LockWord and int32_t have different sizes.");
   1041 
   1042     // Introduce a dependency on the lock_word including rb_state,
   1043     // to prevent load-load reordering, and without using
   1044     // a memory barrier (which would be more expensive).
   1045     // `obj` is unchanged by this operation, but its value now depends
   1046     // on `temp`.
   1047     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
   1048 
   1049     // The actual reference load.
   1050     // A possible implicit null check has already been handled above.
   1051     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
   1052     arm64_codegen->GenerateRawReferenceLoad(instruction_,
   1053                                             ref_,
   1054                                             obj_,
   1055                                             offset_,
   1056                                             index_,
   1057                                             scale_factor_,
   1058                                             /* needs_null_check */ false,
   1059                                             use_load_acquire_);
   1060 
   1061     // Mark the object `ref` when `obj` is gray.
   1062     //
   1063     //   if (rb_state == ReadBarrier::GrayState())
   1064     //     ref = ReadBarrier::Mark(ref);
   1065     //
   1066     // Given the numeric representation, it's enough to check the low bit of the rb_state.
   1067     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   1068     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   1069     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
   1070 
   1071     // Save the old value of the reference before marking it.
   1072     // Note that we cannot use IP to save the old reference, as IP is
   1073     // used internally by the ReadBarrierMarkRegX entry point, and we
   1074     // need the old reference after the call to that entry point.
   1075     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
   1076     __ Mov(temp_.W(), ref_reg);
   1077 
   1078     GenerateReadBarrierMarkRuntimeCall(codegen);
   1079 
   1080     // If the new reference is different from the old reference,
   1081     // update the field in the holder (`*(obj_ + field_offset)`).
   1082     //
   1083     // Note that this field could also hold a different object, if
   1084     // another thread had concurrently changed it. In that case, the
   1085     // LDXR/CMP/BNE sequence of instructions in the compare-and-set
   1086     // (CAS) operation below would abort the CAS, leaving the field
   1087     // as-is.
   1088     __ Cmp(temp_.W(), ref_reg);
   1089     __ B(eq, GetExitLabel());
   1090 
   1091     // Update the the holder's field atomically.  This may fail if
   1092     // mutator updates before us, but it's OK.  This is achieved
   1093     // using a strong compare-and-set (CAS) operation with relaxed
   1094     // memory synchronization ordering, where the expected value is
   1095     // the old reference and the desired value is the new reference.
   1096 
   1097     MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
   1098     UseScratchRegisterScope temps(masm);
   1099 
   1100     // Convenience aliases.
   1101     Register base = obj_.W();
   1102     Register offset = XRegisterFrom(field_offset);
   1103     Register expected = temp_.W();
   1104     Register value = ref_reg;
   1105     Register tmp_ptr = temps.AcquireX();    // Pointer to actual memory.
   1106     Register tmp_value = temps.AcquireW();  // Value in memory.
   1107 
   1108     __ Add(tmp_ptr, base.X(), Operand(offset));
   1109 
   1110     if (kPoisonHeapReferences) {
   1111       arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
   1112       if (value.Is(expected)) {
   1113         // Do not poison `value`, as it is the same register as
   1114         // `expected`, which has just been poisoned.
   1115       } else {
   1116         arm64_codegen->GetAssembler()->PoisonHeapReference(value);
   1117       }
   1118     }
   1119 
   1120     // do {
   1121     //   tmp_value = [tmp_ptr] - expected;
   1122     // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   1123 
   1124     vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
   1125     __ Bind(&loop_head);
   1126     __ Ldxr(tmp_value, MemOperand(tmp_ptr));
   1127     __ Cmp(tmp_value, expected);
   1128     __ B(&comparison_failed, ne);
   1129     __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
   1130     __ Cbnz(tmp_value, &loop_head);
   1131     __ B(&exit_loop);
   1132     __ Bind(&comparison_failed);
   1133     __ Clrex();
   1134     __ Bind(&exit_loop);
   1135 
   1136     if (kPoisonHeapReferences) {
   1137       arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
   1138       if (value.Is(expected)) {
   1139         // Do not unpoison `value`, as it is the same register as
   1140         // `expected`, which has just been unpoisoned.
   1141       } else {
   1142         arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
   1143       }
   1144     }
   1145 
   1146     __ B(GetExitLabel());
   1147   }
   1148 
   1149  private:
   1150   // The register containing the object holding the marked object reference field.
   1151   const Register obj_;
   1152   // The offset, index and scale factor to access the reference in `obj_`.
   1153   uint32_t offset_;
   1154   Location index_;
   1155   size_t scale_factor_;
   1156   // Is a null check required?
   1157   bool needs_null_check_;
   1158   // Should this reference load use Load-Acquire semantics?
   1159   bool use_load_acquire_;
   1160   // A temporary register used to hold the lock word of `obj_`; and
   1161   // also to hold the original reference value, when the reference is
   1162   // marked.
   1163   const Register temp_;
   1164 
   1165   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
   1166 };
   1167 
   1168 // Slow path generating a read barrier for a heap reference.
   1169 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
   1170  public:
   1171   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
   1172                                            Location out,
   1173                                            Location ref,
   1174                                            Location obj,
   1175                                            uint32_t offset,
   1176                                            Location index)
   1177       : SlowPathCodeARM64(instruction),
   1178         out_(out),
   1179         ref_(ref),
   1180         obj_(obj),
   1181         offset_(offset),
   1182         index_(index) {
   1183     DCHECK(kEmitCompilerReadBarrier);
   1184     // If `obj` is equal to `out` or `ref`, it means the initial object
   1185     // has been overwritten by (or after) the heap object reference load
   1186     // to be instrumented, e.g.:
   1187     //
   1188     //   __ Ldr(out, HeapOperand(out, class_offset);
   1189     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
   1190     //
   1191     // In that case, we have lost the information about the original
   1192     // object, and the emitted read barrier cannot work properly.
   1193     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
   1194     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
   1195   }
   1196 
   1197   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
   1198     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
   1199     LocationSummary* locations = instruction_->GetLocations();
   1200     Primitive::Type type = Primitive::kPrimNot;
   1201     DCHECK(locations->CanCall());
   1202     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
   1203     DCHECK(instruction_->IsInstanceFieldGet() ||
   1204            instruction_->IsStaticFieldGet() ||
   1205            instruction_->IsArrayGet() ||
   1206            instruction_->IsInstanceOf() ||
   1207            instruction_->IsCheckCast() ||
   1208            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
   1209         << "Unexpected instruction in read barrier for heap reference slow path: "
   1210         << instruction_->DebugName();
   1211     // The read barrier instrumentation of object ArrayGet
   1212     // instructions does not support the HIntermediateAddress
   1213     // instruction.
   1214     DCHECK(!(instruction_->IsArrayGet() &&
   1215              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
   1216 
   1217     __ Bind(GetEntryLabel());
   1218 
   1219     SaveLiveRegisters(codegen, locations);
   1220 
   1221     // We may have to change the index's value, but as `index_` is a
   1222     // constant member (like other "inputs" of this slow path),
   1223     // introduce a copy of it, `index`.
   1224     Location index = index_;
   1225     if (index_.IsValid()) {
   1226       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
   1227       if (instruction_->IsArrayGet()) {
   1228         // Compute the actual memory offset and store it in `index`.
   1229         Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
   1230         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
   1231         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
   1232           // We are about to change the value of `index_reg` (see the
   1233           // calls to vixl::MacroAssembler::Lsl and
   1234           // vixl::MacroAssembler::Mov below), but it has
   1235           // not been saved by the previous call to
   1236           // art::SlowPathCode::SaveLiveRegisters, as it is a
   1237           // callee-save register --
   1238           // art::SlowPathCode::SaveLiveRegisters does not consider
   1239           // callee-save registers, as it has been designed with the
   1240           // assumption that callee-save registers are supposed to be
   1241           // handled by the called function.  So, as a callee-save
   1242           // register, `index_reg` _would_ eventually be saved onto
   1243           // the stack, but it would be too late: we would have
   1244           // changed its value earlier.  Therefore, we manually save
   1245           // it here into another freely available register,
   1246           // `free_reg`, chosen of course among the caller-save
   1247           // registers (as a callee-save `free_reg` register would
   1248           // exhibit the same problem).
   1249           //
   1250           // Note we could have requested a temporary register from
   1251           // the register allocator instead; but we prefer not to, as
   1252           // this is a slow path, and we know we can find a
   1253           // caller-save register that is available.
   1254           Register free_reg = FindAvailableCallerSaveRegister(codegen);
   1255           __ Mov(free_reg.W(), index_reg);
   1256           index_reg = free_reg;
   1257           index = LocationFrom(index_reg);
   1258         } else {
   1259           // The initial register stored in `index_` has already been
   1260           // saved in the call to art::SlowPathCode::SaveLiveRegisters
   1261           // (as it is not a callee-save register), so we can freely
   1262           // use it.
   1263         }
   1264         // Shifting the index value contained in `index_reg` by the scale
   1265         // factor (2) cannot overflow in practice, as the runtime is
   1266         // unable to allocate object arrays with a size larger than
   1267         // 2^26 - 1 (that is, 2^28 - 4 bytes).
   1268         __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
   1269         static_assert(
   1270             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   1271             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   1272         __ Add(index_reg, index_reg, Operand(offset_));
   1273       } else {
   1274         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
   1275         // intrinsics, `index_` is not shifted by a scale factor of 2
   1276         // (as in the case of ArrayGet), as it is actually an offset
   1277         // to an object field within an object.
   1278         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
   1279         DCHECK(instruction_->GetLocations()->Intrinsified());
   1280         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
   1281                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
   1282             << instruction_->AsInvoke()->GetIntrinsic();
   1283         DCHECK_EQ(offset_, 0u);
   1284         DCHECK(index_.IsRegister());
   1285       }
   1286     }
   1287 
   1288     // We're moving two or three locations to locations that could
   1289     // overlap, so we need a parallel move resolver.
   1290     InvokeRuntimeCallingConvention calling_convention;
   1291     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
   1292     parallel_move.AddMove(ref_,
   1293                           LocationFrom(calling_convention.GetRegisterAt(0)),
   1294                           type,
   1295                           nullptr);
   1296     parallel_move.AddMove(obj_,
   1297                           LocationFrom(calling_convention.GetRegisterAt(1)),
   1298                           type,
   1299                           nullptr);
   1300     if (index.IsValid()) {
   1301       parallel_move.AddMove(index,
   1302                             LocationFrom(calling_convention.GetRegisterAt(2)),
   1303                             Primitive::kPrimInt,
   1304                             nullptr);
   1305       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
   1306     } else {
   1307       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
   1308       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
   1309     }
   1310     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
   1311                                  instruction_,
   1312                                  instruction_->GetDexPc(),
   1313                                  this);
   1314     CheckEntrypointTypes<
   1315         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
   1316     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
   1317 
   1318     RestoreLiveRegisters(codegen, locations);
   1319 
   1320     __ B(GetExitLabel());
   1321   }
   1322 
   1323   const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
   1324 
   1325  private:
   1326   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
   1327     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
   1328     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
   1329     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
   1330       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
   1331         return Register(VIXLRegCodeFromART(i), kXRegSize);
   1332       }
   1333     }
   1334     // We shall never fail to find a free caller-save register, as
   1335     // there are more than two core caller-save registers on ARM64
   1336     // (meaning it is possible to find one which is different from
   1337     // `ref` and `obj`).
   1338     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
   1339     LOG(FATAL) << "Could not find a free register";
   1340     UNREACHABLE();
   1341   }
   1342 
   1343   const Location out_;
   1344   const Location ref_;
   1345   const Location obj_;
   1346   const uint32_t offset_;
   1347   // An additional location containing an index to an array.
   1348   // Only used for HArrayGet and the UnsafeGetObject &
   1349   // UnsafeGetObjectVolatile intrinsics.
   1350   const Location index_;
   1351 
   1352   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
   1353 };
   1354 
   1355 // Slow path generating a read barrier for a GC root.
   1356 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
   1357  public:
   1358   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
   1359       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
   1360     DCHECK(kEmitCompilerReadBarrier);
   1361   }
   1362 
   1363   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
   1364     LocationSummary* locations = instruction_->GetLocations();
   1365     Primitive::Type type = Primitive::kPrimNot;
   1366     DCHECK(locations->CanCall());
   1367     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
   1368     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
   1369         << "Unexpected instruction in read barrier for GC root slow path: "
   1370         << instruction_->DebugName();
   1371 
   1372     __ Bind(GetEntryLabel());
   1373     SaveLiveRegisters(codegen, locations);
   1374 
   1375     InvokeRuntimeCallingConvention calling_convention;
   1376     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
   1377     // The argument of the ReadBarrierForRootSlow is not a managed
   1378     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
   1379     // thus we need a 64-bit move here, and we cannot use
   1380     //
   1381     //   arm64_codegen->MoveLocation(
   1382     //       LocationFrom(calling_convention.GetRegisterAt(0)),
   1383     //       root_,
   1384     //       type);
   1385     //
   1386     // which would emit a 32-bit move, as `type` is a (32-bit wide)
   1387     // reference type (`Primitive::kPrimNot`).
   1388     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
   1389     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
   1390                                  instruction_,
   1391                                  instruction_->GetDexPc(),
   1392                                  this);
   1393     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
   1394     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
   1395 
   1396     RestoreLiveRegisters(codegen, locations);
   1397     __ B(GetExitLabel());
   1398   }
   1399 
   1400   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
   1401 
   1402  private:
   1403   const Location out_;
   1404   const Location root_;
   1405 
   1406   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
   1407 };
   1408 
   1409 #undef __
   1410 
   1411 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
   1412   Location next_location;
   1413   if (type == Primitive::kPrimVoid) {
   1414     LOG(FATAL) << "Unreachable type " << type;
   1415   }
   1416 
   1417   if (Primitive::IsFloatingPointType(type) &&
   1418       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
   1419     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
   1420   } else if (!Primitive::IsFloatingPointType(type) &&
   1421              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
   1422     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
   1423   } else {
   1424     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
   1425     next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
   1426                                                  : Location::StackSlot(stack_offset);
   1427   }
   1428 
   1429   // Space on the stack is reserved for all arguments.
   1430   stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
   1431   return next_location;
   1432 }
   1433 
   1434 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
   1435   return LocationFrom(kArtMethodRegister);
   1436 }
   1437 
   1438 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
   1439                                        const Arm64InstructionSetFeatures& isa_features,
   1440                                        const CompilerOptions& compiler_options,
   1441                                        OptimizingCompilerStats* stats)
   1442     : CodeGenerator(graph,
   1443                     kNumberOfAllocatableRegisters,
   1444                     kNumberOfAllocatableFPRegisters,
   1445                     kNumberOfAllocatableRegisterPairs,
   1446                     callee_saved_core_registers.GetList(),
   1447                     callee_saved_fp_registers.GetList(),
   1448                     compiler_options,
   1449                     stats),
   1450       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1451       jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1452       location_builder_(graph, this),
   1453       instruction_visitor_(graph, this),
   1454       move_resolver_(graph->GetArena(), this),
   1455       assembler_(graph->GetArena()),
   1456       isa_features_(isa_features),
   1457       uint32_literals_(std::less<uint32_t>(),
   1458                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1459       uint64_literals_(std::less<uint64_t>(),
   1460                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1461       pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1462       method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1463       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1464       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1465       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1466       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1467       jit_string_patches_(StringReferenceValueComparator(),
   1468                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1469       jit_class_patches_(TypeReferenceValueComparator(),
   1470                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   1471   // Save the link register (containing the return address) to mimic Quick.
   1472   AddAllocatedRegister(LocationFrom(lr));
   1473 }
   1474 
   1475 #define __ GetVIXLAssembler()->
   1476 
   1477 void CodeGeneratorARM64::EmitJumpTables() {
   1478   for (auto&& jump_table : jump_tables_) {
   1479     jump_table->EmitTable(this);
   1480   }
   1481 }
   1482 
   1483 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
   1484   EmitJumpTables();
   1485   // Ensure we emit the literal pool.
   1486   __ FinalizeCode();
   1487 
   1488   CodeGenerator::Finalize(allocator);
   1489 }
   1490 
   1491 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
   1492   // Note: There are 6 kinds of moves:
   1493   // 1. constant -> GPR/FPR (non-cycle)
   1494   // 2. constant -> stack (non-cycle)
   1495   // 3. GPR/FPR -> GPR/FPR
   1496   // 4. GPR/FPR -> stack
   1497   // 5. stack -> GPR/FPR
   1498   // 6. stack -> stack (non-cycle)
   1499   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
   1500   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
   1501   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
   1502   // dependency.
   1503   vixl_temps_.Open(GetVIXLAssembler());
   1504 }
   1505 
   1506 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
   1507   vixl_temps_.Close();
   1508 }
   1509 
   1510 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
   1511   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
   1512          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
   1513          || kind == Location::kSIMDStackSlot);
   1514   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
   1515       ? Location::kFpuRegister
   1516       : Location::kRegister;
   1517   Location scratch = GetScratchLocation(kind);
   1518   if (!scratch.Equals(Location::NoLocation())) {
   1519     return scratch;
   1520   }
   1521   // Allocate from VIXL temp registers.
   1522   if (kind == Location::kRegister) {
   1523     scratch = LocationFrom(vixl_temps_.AcquireX());
   1524   } else {
   1525     DCHECK_EQ(kind, Location::kFpuRegister);
   1526     scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
   1527         ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
   1528         : vixl_temps_.AcquireD());
   1529   }
   1530   AddScratchLocation(scratch);
   1531   return scratch;
   1532 }
   1533 
   1534 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
   1535   if (loc.IsRegister()) {
   1536     vixl_temps_.Release(XRegisterFrom(loc));
   1537   } else {
   1538     DCHECK(loc.IsFpuRegister());
   1539     vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
   1540   }
   1541   RemoveScratchLocation(loc);
   1542 }
   1543 
   1544 void ParallelMoveResolverARM64::EmitMove(size_t index) {
   1545   MoveOperands* move = moves_[index];
   1546   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
   1547 }
   1548 
   1549 void CodeGeneratorARM64::GenerateFrameEntry() {
   1550   MacroAssembler* masm = GetVIXLAssembler();
   1551   __ Bind(&frame_entry_label_);
   1552 
   1553   bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
   1554   if (do_overflow_check) {
   1555     UseScratchRegisterScope temps(masm);
   1556     Register temp = temps.AcquireX();
   1557     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   1558     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
   1559     {
   1560       // Ensure that between load and RecordPcInfo there are no pools emitted.
   1561       ExactAssemblyScope eas(GetVIXLAssembler(),
   1562                              kInstructionSize,
   1563                              CodeBufferCheckScope::kExactSize);
   1564       __ ldr(wzr, MemOperand(temp, 0));
   1565       RecordPcInfo(nullptr, 0);
   1566     }
   1567   }
   1568 
   1569   if (!HasEmptyFrame()) {
   1570     int frame_size = GetFrameSize();
   1571     // Stack layout:
   1572     //      sp[frame_size - 8]        : lr.
   1573     //      ...                       : other preserved core registers.
   1574     //      ...                       : other preserved fp registers.
   1575     //      ...                       : reserved frame space.
   1576     //      sp[0]                     : current method.
   1577 
   1578     // Save the current method if we need it. Note that we do not
   1579     // do this in HCurrentMethod, as the instruction might have been removed
   1580     // in the SSA graph.
   1581     if (RequiresCurrentMethod()) {
   1582       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
   1583     } else {
   1584       __ Claim(frame_size);
   1585     }
   1586     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
   1587     GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
   1588         frame_size - GetCoreSpillSize());
   1589     GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
   1590         frame_size - FrameEntrySpillSize());
   1591 
   1592     if (GetGraph()->HasShouldDeoptimizeFlag()) {
   1593       // Initialize should_deoptimize flag to 0.
   1594       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
   1595       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
   1596     }
   1597   }
   1598 }
   1599 
   1600 void CodeGeneratorARM64::GenerateFrameExit() {
   1601   GetAssembler()->cfi().RememberState();
   1602   if (!HasEmptyFrame()) {
   1603     int frame_size = GetFrameSize();
   1604     GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
   1605         frame_size - FrameEntrySpillSize());
   1606     GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
   1607         frame_size - GetCoreSpillSize());
   1608     __ Drop(frame_size);
   1609     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
   1610   }
   1611   __ Ret();
   1612   GetAssembler()->cfi().RestoreState();
   1613   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
   1614 }
   1615 
   1616 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
   1617   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
   1618   return CPURegList(CPURegister::kRegister, kXRegSize,
   1619                     core_spill_mask_);
   1620 }
   1621 
   1622 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
   1623   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
   1624                                          GetNumberOfFloatingPointRegisters()));
   1625   return CPURegList(CPURegister::kFPRegister, kDRegSize,
   1626                     fpu_spill_mask_);
   1627 }
   1628 
   1629 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
   1630   __ Bind(GetLabelOf(block));
   1631 }
   1632 
   1633 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
   1634   DCHECK(location.IsRegister());
   1635   __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
   1636 }
   1637 
   1638 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
   1639   if (location.IsRegister()) {
   1640     locations->AddTemp(location);
   1641   } else {
   1642     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
   1643   }
   1644 }
   1645 
   1646 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
   1647   UseScratchRegisterScope temps(GetVIXLAssembler());
   1648   Register card = temps.AcquireX();
   1649   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
   1650   vixl::aarch64::Label done;
   1651   if (value_can_be_null) {
   1652     __ Cbz(value, &done);
   1653   }
   1654   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
   1655   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
   1656   __ Strb(card, MemOperand(card, temp.X()));
   1657   if (value_can_be_null) {
   1658     __ Bind(&done);
   1659   }
   1660 }
   1661 
   1662 void CodeGeneratorARM64::SetupBlockedRegisters() const {
   1663   // Blocked core registers:
   1664   //      lr        : Runtime reserved.
   1665   //      tr        : Runtime reserved.
   1666   //      mr        : Runtime reserved.
   1667   //      ip1       : VIXL core temp.
   1668   //      ip0       : VIXL core temp.
   1669   //
   1670   // Blocked fp registers:
   1671   //      d31       : VIXL fp temp.
   1672   CPURegList reserved_core_registers = vixl_reserved_core_registers;
   1673   reserved_core_registers.Combine(runtime_reserved_core_registers);
   1674   while (!reserved_core_registers.IsEmpty()) {
   1675     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
   1676   }
   1677 
   1678   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
   1679   while (!reserved_fp_registers.IsEmpty()) {
   1680     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
   1681   }
   1682 
   1683   if (GetGraph()->IsDebuggable()) {
   1684     // Stubs do not save callee-save floating point registers. If the graph
   1685     // is debuggable, we need to deal with these registers differently. For
   1686     // now, just block them.
   1687     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
   1688     while (!reserved_fp_registers_debuggable.IsEmpty()) {
   1689       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
   1690     }
   1691   }
   1692 }
   1693 
   1694 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   1695   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   1696   __ Str(reg, MemOperand(sp, stack_index));
   1697   return kArm64WordSize;
   1698 }
   1699 
   1700 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   1701   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   1702   __ Ldr(reg, MemOperand(sp, stack_index));
   1703   return kArm64WordSize;
   1704 }
   1705 
   1706 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1707   FPRegister reg = FPRegister(reg_id, kDRegSize);
   1708   __ Str(reg, MemOperand(sp, stack_index));
   1709   return kArm64WordSize;
   1710 }
   1711 
   1712 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1713   FPRegister reg = FPRegister(reg_id, kDRegSize);
   1714   __ Ldr(reg, MemOperand(sp, stack_index));
   1715   return kArm64WordSize;
   1716 }
   1717 
   1718 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
   1719   stream << XRegister(reg);
   1720 }
   1721 
   1722 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
   1723   stream << DRegister(reg);
   1724 }
   1725 
   1726 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
   1727   if (constant->IsIntConstant()) {
   1728     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
   1729   } else if (constant->IsLongConstant()) {
   1730     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
   1731   } else if (constant->IsNullConstant()) {
   1732     __ Mov(Register(destination), 0);
   1733   } else if (constant->IsFloatConstant()) {
   1734     __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
   1735   } else {
   1736     DCHECK(constant->IsDoubleConstant());
   1737     __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
   1738   }
   1739 }
   1740 
   1741 
   1742 static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
   1743   DCHECK(constant.IsConstant());
   1744   HConstant* cst = constant.GetConstant();
   1745   return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
   1746          // Null is mapped to a core W register, which we associate with kPrimInt.
   1747          (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
   1748          (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
   1749          (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
   1750          (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
   1751 }
   1752 
   1753 // Allocate a scratch register from the VIXL pool, querying first
   1754 // the floating-point register pool, and then the core register
   1755 // pool. This is essentially a reimplementation of
   1756 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
   1757 // using a different allocation strategy.
   1758 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
   1759                                                     vixl::aarch64::UseScratchRegisterScope* temps,
   1760                                                     int size_in_bits) {
   1761   return masm->GetScratchFPRegisterList()->IsEmpty()
   1762       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
   1763       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
   1764 }
   1765 
   1766 void CodeGeneratorARM64::MoveLocation(Location destination,
   1767                                       Location source,
   1768                                       Primitive::Type dst_type) {
   1769   if (source.Equals(destination)) {
   1770     return;
   1771   }
   1772 
   1773   // A valid move can always be inferred from the destination and source
   1774   // locations. When moving from and to a register, the argument type can be
   1775   // used to generate 32bit instead of 64bit moves. In debug mode we also
   1776   // checks the coherency of the locations and the type.
   1777   bool unspecified_type = (dst_type == Primitive::kPrimVoid);
   1778 
   1779   if (destination.IsRegister() || destination.IsFpuRegister()) {
   1780     if (unspecified_type) {
   1781       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
   1782       if (source.IsStackSlot() ||
   1783           (src_cst != nullptr && (src_cst->IsIntConstant()
   1784                                   || src_cst->IsFloatConstant()
   1785                                   || src_cst->IsNullConstant()))) {
   1786         // For stack slots and 32bit constants, a 64bit type is appropriate.
   1787         dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
   1788       } else {
   1789         // If the source is a double stack slot or a 64bit constant, a 64bit
   1790         // type is appropriate. Else the source is a register, and since the
   1791         // type has not been specified, we chose a 64bit type to force a 64bit
   1792         // move.
   1793         dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
   1794       }
   1795     }
   1796     DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) ||
   1797            (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type)));
   1798     CPURegister dst = CPURegisterFrom(destination, dst_type);
   1799     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
   1800       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
   1801       __ Ldr(dst, StackOperandFrom(source));
   1802     } else if (source.IsSIMDStackSlot()) {
   1803       __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
   1804     } else if (source.IsConstant()) {
   1805       DCHECK(CoherentConstantAndType(source, dst_type));
   1806       MoveConstant(dst, source.GetConstant());
   1807     } else if (source.IsRegister()) {
   1808       if (destination.IsRegister()) {
   1809         __ Mov(Register(dst), RegisterFrom(source, dst_type));
   1810       } else {
   1811         DCHECK(destination.IsFpuRegister());
   1812         Primitive::Type source_type = Primitive::Is64BitType(dst_type)
   1813             ? Primitive::kPrimLong
   1814             : Primitive::kPrimInt;
   1815         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
   1816       }
   1817     } else {
   1818       DCHECK(source.IsFpuRegister());
   1819       if (destination.IsRegister()) {
   1820         Primitive::Type source_type = Primitive::Is64BitType(dst_type)
   1821             ? Primitive::kPrimDouble
   1822             : Primitive::kPrimFloat;
   1823         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
   1824       } else {
   1825         DCHECK(destination.IsFpuRegister());
   1826         if (GetGraph()->HasSIMD()) {
   1827           __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
   1828         } else {
   1829           __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
   1830         }
   1831       }
   1832     }
   1833   } else if (destination.IsSIMDStackSlot()) {
   1834     if (source.IsFpuRegister()) {
   1835       __ Str(QRegisterFrom(source), StackOperandFrom(destination));
   1836     } else {
   1837       DCHECK(source.IsSIMDStackSlot());
   1838       UseScratchRegisterScope temps(GetVIXLAssembler());
   1839       if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
   1840         Register temp = temps.AcquireX();
   1841         __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
   1842         __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
   1843         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
   1844         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
   1845       } else {
   1846         FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
   1847         __ Ldr(temp, StackOperandFrom(source));
   1848         __ Str(temp, StackOperandFrom(destination));
   1849       }
   1850     }
   1851   } else {  // The destination is not a register. It must be a stack slot.
   1852     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
   1853     if (source.IsRegister() || source.IsFpuRegister()) {
   1854       if (unspecified_type) {
   1855         if (source.IsRegister()) {
   1856           dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
   1857         } else {
   1858           dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
   1859         }
   1860       }
   1861       DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) &&
   1862              (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type)));
   1863       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
   1864     } else if (source.IsConstant()) {
   1865       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
   1866           << source << " " << dst_type;
   1867       UseScratchRegisterScope temps(GetVIXLAssembler());
   1868       HConstant* src_cst = source.GetConstant();
   1869       CPURegister temp;
   1870       if (src_cst->IsZeroBitPattern()) {
   1871         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
   1872             ? Register(xzr)
   1873             : Register(wzr);
   1874       } else {
   1875         if (src_cst->IsIntConstant()) {
   1876           temp = temps.AcquireW();
   1877         } else if (src_cst->IsLongConstant()) {
   1878           temp = temps.AcquireX();
   1879         } else if (src_cst->IsFloatConstant()) {
   1880           temp = temps.AcquireS();
   1881         } else {
   1882           DCHECK(src_cst->IsDoubleConstant());
   1883           temp = temps.AcquireD();
   1884         }
   1885         MoveConstant(temp, src_cst);
   1886       }
   1887       __ Str(temp, StackOperandFrom(destination));
   1888     } else {
   1889       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
   1890       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
   1891       UseScratchRegisterScope temps(GetVIXLAssembler());
   1892       // Use any scratch register (a core or a floating-point one)
   1893       // from VIXL scratch register pools as a temporary.
   1894       //
   1895       // We used to only use the FP scratch register pool, but in some
   1896       // rare cases the only register from this pool (D31) would
   1897       // already be used (e.g. within a ParallelMove instruction, when
   1898       // a move is blocked by a another move requiring a scratch FP
   1899       // register, which would reserve D31). To prevent this issue, we
   1900       // ask for a scratch register of any type (core or FP).
   1901       //
   1902       // Also, we start by asking for a FP scratch register first, as the
   1903       // demand of scratch core registers is higher. This is why we
   1904       // use AcquireFPOrCoreCPURegisterOfSize instead of
   1905       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
   1906       // allocates core scratch registers first.
   1907       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
   1908           GetVIXLAssembler(),
   1909           &temps,
   1910           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
   1911       __ Ldr(temp, StackOperandFrom(source));
   1912       __ Str(temp, StackOperandFrom(destination));
   1913     }
   1914   }
   1915 }
   1916 
   1917 void CodeGeneratorARM64::Load(Primitive::Type type,
   1918                               CPURegister dst,
   1919                               const MemOperand& src) {
   1920   switch (type) {
   1921     case Primitive::kPrimBoolean:
   1922       __ Ldrb(Register(dst), src);
   1923       break;
   1924     case Primitive::kPrimByte:
   1925       __ Ldrsb(Register(dst), src);
   1926       break;
   1927     case Primitive::kPrimShort:
   1928       __ Ldrsh(Register(dst), src);
   1929       break;
   1930     case Primitive::kPrimChar:
   1931       __ Ldrh(Register(dst), src);
   1932       break;
   1933     case Primitive::kPrimInt:
   1934     case Primitive::kPrimNot:
   1935     case Primitive::kPrimLong:
   1936     case Primitive::kPrimFloat:
   1937     case Primitive::kPrimDouble:
   1938       DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
   1939       __ Ldr(dst, src);
   1940       break;
   1941     case Primitive::kPrimVoid:
   1942       LOG(FATAL) << "Unreachable type " << type;
   1943   }
   1944 }
   1945 
   1946 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
   1947                                      CPURegister dst,
   1948                                      const MemOperand& src,
   1949                                      bool needs_null_check) {
   1950   MacroAssembler* masm = GetVIXLAssembler();
   1951   UseScratchRegisterScope temps(masm);
   1952   Register temp_base = temps.AcquireX();
   1953   Primitive::Type type = instruction->GetType();
   1954 
   1955   DCHECK(!src.IsPreIndex());
   1956   DCHECK(!src.IsPostIndex());
   1957 
   1958   // TODO(vixl): Let the MacroAssembler handle MemOperand.
   1959   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
   1960   {
   1961     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   1962     MemOperand base = MemOperand(temp_base);
   1963     switch (type) {
   1964       case Primitive::kPrimBoolean:
   1965         {
   1966           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1967           __ ldarb(Register(dst), base);
   1968           if (needs_null_check) {
   1969             MaybeRecordImplicitNullCheck(instruction);
   1970           }
   1971         }
   1972         break;
   1973       case Primitive::kPrimByte:
   1974         {
   1975           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1976           __ ldarb(Register(dst), base);
   1977           if (needs_null_check) {
   1978             MaybeRecordImplicitNullCheck(instruction);
   1979           }
   1980         }
   1981         __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
   1982         break;
   1983       case Primitive::kPrimChar:
   1984         {
   1985           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1986           __ ldarh(Register(dst), base);
   1987           if (needs_null_check) {
   1988             MaybeRecordImplicitNullCheck(instruction);
   1989           }
   1990         }
   1991         break;
   1992       case Primitive::kPrimShort:
   1993         {
   1994           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1995           __ ldarh(Register(dst), base);
   1996           if (needs_null_check) {
   1997             MaybeRecordImplicitNullCheck(instruction);
   1998           }
   1999         }
   2000         __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
   2001         break;
   2002       case Primitive::kPrimInt:
   2003       case Primitive::kPrimNot:
   2004       case Primitive::kPrimLong:
   2005         DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
   2006         {
   2007           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2008           __ ldar(Register(dst), base);
   2009           if (needs_null_check) {
   2010             MaybeRecordImplicitNullCheck(instruction);
   2011           }
   2012         }
   2013         break;
   2014       case Primitive::kPrimFloat:
   2015       case Primitive::kPrimDouble: {
   2016         DCHECK(dst.IsFPRegister());
   2017         DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
   2018 
   2019         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
   2020         {
   2021           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2022           __ ldar(temp, base);
   2023           if (needs_null_check) {
   2024             MaybeRecordImplicitNullCheck(instruction);
   2025           }
   2026         }
   2027         __ Fmov(FPRegister(dst), temp);
   2028         break;
   2029       }
   2030       case Primitive::kPrimVoid:
   2031         LOG(FATAL) << "Unreachable type " << type;
   2032     }
   2033   }
   2034 }
   2035 
   2036 void CodeGeneratorARM64::Store(Primitive::Type type,
   2037                                CPURegister src,
   2038                                const MemOperand& dst) {
   2039   switch (type) {
   2040     case Primitive::kPrimBoolean:
   2041     case Primitive::kPrimByte:
   2042       __ Strb(Register(src), dst);
   2043       break;
   2044     case Primitive::kPrimChar:
   2045     case Primitive::kPrimShort:
   2046       __ Strh(Register(src), dst);
   2047       break;
   2048     case Primitive::kPrimInt:
   2049     case Primitive::kPrimNot:
   2050     case Primitive::kPrimLong:
   2051     case Primitive::kPrimFloat:
   2052     case Primitive::kPrimDouble:
   2053       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
   2054       __ Str(src, dst);
   2055       break;
   2056     case Primitive::kPrimVoid:
   2057       LOG(FATAL) << "Unreachable type " << type;
   2058   }
   2059 }
   2060 
   2061 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
   2062                                       Primitive::Type type,
   2063                                       CPURegister src,
   2064                                       const MemOperand& dst,
   2065                                       bool needs_null_check) {
   2066   MacroAssembler* masm = GetVIXLAssembler();
   2067   UseScratchRegisterScope temps(GetVIXLAssembler());
   2068   Register temp_base = temps.AcquireX();
   2069 
   2070   DCHECK(!dst.IsPreIndex());
   2071   DCHECK(!dst.IsPostIndex());
   2072 
   2073   // TODO(vixl): Let the MacroAssembler handle this.
   2074   Operand op = OperandFromMemOperand(dst);
   2075   __ Add(temp_base, dst.GetBaseRegister(), op);
   2076   MemOperand base = MemOperand(temp_base);
   2077   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   2078   switch (type) {
   2079     case Primitive::kPrimBoolean:
   2080     case Primitive::kPrimByte:
   2081       {
   2082         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2083         __ stlrb(Register(src), base);
   2084         if (needs_null_check) {
   2085           MaybeRecordImplicitNullCheck(instruction);
   2086         }
   2087       }
   2088       break;
   2089     case Primitive::kPrimChar:
   2090     case Primitive::kPrimShort:
   2091       {
   2092         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2093         __ stlrh(Register(src), base);
   2094         if (needs_null_check) {
   2095           MaybeRecordImplicitNullCheck(instruction);
   2096         }
   2097       }
   2098       break;
   2099     case Primitive::kPrimInt:
   2100     case Primitive::kPrimNot:
   2101     case Primitive::kPrimLong:
   2102       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
   2103       {
   2104         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2105         __ stlr(Register(src), base);
   2106         if (needs_null_check) {
   2107           MaybeRecordImplicitNullCheck(instruction);
   2108         }
   2109       }
   2110       break;
   2111     case Primitive::kPrimFloat:
   2112     case Primitive::kPrimDouble: {
   2113       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
   2114       Register temp_src;
   2115       if (src.IsZero()) {
   2116         // The zero register is used to avoid synthesizing zero constants.
   2117         temp_src = Register(src);
   2118       } else {
   2119         DCHECK(src.IsFPRegister());
   2120         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
   2121         __ Fmov(temp_src, FPRegister(src));
   2122       }
   2123       {
   2124         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2125         __ stlr(temp_src, base);
   2126         if (needs_null_check) {
   2127           MaybeRecordImplicitNullCheck(instruction);
   2128         }
   2129       }
   2130       break;
   2131     }
   2132     case Primitive::kPrimVoid:
   2133       LOG(FATAL) << "Unreachable type " << type;
   2134   }
   2135 }
   2136 
   2137 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
   2138                                        HInstruction* instruction,
   2139                                        uint32_t dex_pc,
   2140                                        SlowPathCode* slow_path) {
   2141   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
   2142 
   2143   __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
   2144   {
   2145     // Ensure the pc position is recorded immediately after the `blr` instruction.
   2146     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
   2147     __ blr(lr);
   2148     if (EntrypointRequiresStackMap(entrypoint)) {
   2149       RecordPcInfo(instruction, dex_pc, slow_path);
   2150     }
   2151   }
   2152 }
   2153 
   2154 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
   2155                                                              HInstruction* instruction,
   2156                                                              SlowPathCode* slow_path) {
   2157   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
   2158   __ Ldr(lr, MemOperand(tr, entry_point_offset));
   2159   __ Blr(lr);
   2160 }
   2161 
   2162 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
   2163                                                                      Register class_reg) {
   2164   UseScratchRegisterScope temps(GetVIXLAssembler());
   2165   Register temp = temps.AcquireW();
   2166   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
   2167 
   2168   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   2169   // TODO(vixl): Let the MacroAssembler handle MemOperand.
   2170   __ Add(temp, class_reg, status_offset);
   2171   __ Ldar(temp, HeapOperand(temp));
   2172   __ Cmp(temp, mirror::Class::kStatusInitialized);
   2173   __ B(lt, slow_path->GetEntryLabel());
   2174   __ Bind(slow_path->GetExitLabel());
   2175 }
   2176 
   2177 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   2178   BarrierType type = BarrierAll;
   2179 
   2180   switch (kind) {
   2181     case MemBarrierKind::kAnyAny:
   2182     case MemBarrierKind::kAnyStore: {
   2183       type = BarrierAll;
   2184       break;
   2185     }
   2186     case MemBarrierKind::kLoadAny: {
   2187       type = BarrierReads;
   2188       break;
   2189     }
   2190     case MemBarrierKind::kStoreStore: {
   2191       type = BarrierWrites;
   2192       break;
   2193     }
   2194     default:
   2195       LOG(FATAL) << "Unexpected memory barrier " << kind;
   2196   }
   2197   __ Dmb(InnerShareable, type);
   2198 }
   2199 
   2200 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
   2201                                                          HBasicBlock* successor) {
   2202   SuspendCheckSlowPathARM64* slow_path =
   2203       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
   2204   if (slow_path == nullptr) {
   2205     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
   2206     instruction->SetSlowPath(slow_path);
   2207     codegen_->AddSlowPath(slow_path);
   2208     if (successor != nullptr) {
   2209       DCHECK(successor->IsLoopHeader());
   2210       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
   2211     }
   2212   } else {
   2213     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   2214   }
   2215 
   2216   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   2217   Register temp = temps.AcquireW();
   2218 
   2219   __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
   2220   if (successor == nullptr) {
   2221     __ Cbnz(temp, slow_path->GetEntryLabel());
   2222     __ Bind(slow_path->GetReturnLabel());
   2223   } else {
   2224     __ Cbz(temp, codegen_->GetLabelOf(successor));
   2225     __ B(slow_path->GetEntryLabel());
   2226     // slow_path will return to GetLabelOf(successor).
   2227   }
   2228 }
   2229 
   2230 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
   2231                                                              CodeGeneratorARM64* codegen)
   2232       : InstructionCodeGenerator(graph, codegen),
   2233         assembler_(codegen->GetAssembler()),
   2234         codegen_(codegen) {}
   2235 
   2236 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
   2237   /* No unimplemented IR. */
   2238 
   2239 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
   2240 
   2241 enum UnimplementedInstructionBreakCode {
   2242   // Using a base helps identify when we hit such breakpoints.
   2243   UnimplementedInstructionBreakCodeBaseCode = 0x900,
   2244 #define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name),
   2245   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION)
   2246 #undef ENUM_UNIMPLEMENTED_INSTRUCTION
   2247 };
   2248 
   2249 #define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name)                               \
   2250   void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) {  \
   2251     __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name));                               \
   2252   }                                                                                   \
   2253   void LocationsBuilderARM64::Visit##name(H##name* instr) {                           \
   2254     LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \
   2255     locations->SetOut(Location::Any());                                               \
   2256   }
   2257   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)
   2258 #undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS
   2259 
   2260 #undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE
   2261 #undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
   2262 
   2263 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
   2264   DCHECK_EQ(instr->InputCount(), 2U);
   2265   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   2266   Primitive::Type type = instr->GetResultType();
   2267   switch (type) {
   2268     case Primitive::kPrimInt:
   2269     case Primitive::kPrimLong:
   2270       locations->SetInAt(0, Location::RequiresRegister());
   2271       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
   2272       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2273       break;
   2274 
   2275     case Primitive::kPrimFloat:
   2276     case Primitive::kPrimDouble:
   2277       locations->SetInAt(0, Location::RequiresFpuRegister());
   2278       locations->SetInAt(1, Location::RequiresFpuRegister());
   2279       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2280       break;
   2281 
   2282     default:
   2283       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
   2284   }
   2285 }
   2286 
   2287 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
   2288                                            const FieldInfo& field_info) {
   2289   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   2290 
   2291   bool object_field_get_with_read_barrier =
   2292       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   2293   LocationSummary* locations =
   2294       new (GetGraph()->GetArena()) LocationSummary(instruction,
   2295                                                    object_field_get_with_read_barrier ?
   2296                                                        LocationSummary::kCallOnSlowPath :
   2297                                                        LocationSummary::kNoCall);
   2298   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
   2299     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   2300     // We need a temporary register for the read barrier marking slow
   2301     // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
   2302     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
   2303         !Runtime::Current()->UseJitCompilation() &&
   2304         !field_info.IsVolatile()) {
   2305       // If link-time thunks for the Baker read barrier are enabled, for AOT
   2306       // non-volatile loads we need a temporary only if the offset is too big.
   2307       if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
   2308         locations->AddTemp(FixedTempLocation());
   2309       }
   2310     } else {
   2311       locations->AddTemp(Location::RequiresRegister());
   2312     }
   2313   }
   2314   locations->SetInAt(0, Location::RequiresRegister());
   2315   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   2316     locations->SetOut(Location::RequiresFpuRegister());
   2317   } else {
   2318     // The output overlaps for an object field get when read barriers
   2319     // are enabled: we do not want the load to overwrite the object's
   2320     // location, as we need it to emit the read barrier.
   2321     locations->SetOut(
   2322         Location::RequiresRegister(),
   2323         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   2324   }
   2325 }
   2326 
   2327 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
   2328                                                    const FieldInfo& field_info) {
   2329   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   2330   LocationSummary* locations = instruction->GetLocations();
   2331   Location base_loc = locations->InAt(0);
   2332   Location out = locations->Out();
   2333   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   2334   Primitive::Type field_type = field_info.GetFieldType();
   2335   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
   2336 
   2337   if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2338     // Object FieldGet with Baker's read barrier case.
   2339     // /* HeapReference<Object> */ out = *(base + offset)
   2340     Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
   2341     Location maybe_temp =
   2342         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
   2343     // Note that potential implicit null checks are handled in this
   2344     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
   2345     codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2346         instruction,
   2347         out,
   2348         base,
   2349         offset,
   2350         maybe_temp,
   2351         /* needs_null_check */ true,
   2352         field_info.IsVolatile());
   2353   } else {
   2354     // General case.
   2355     if (field_info.IsVolatile()) {
   2356       // Note that a potential implicit null check is handled in this
   2357       // CodeGeneratorARM64::LoadAcquire call.
   2358       // NB: LoadAcquire will record the pc info if needed.
   2359       codegen_->LoadAcquire(
   2360           instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
   2361     } else {
   2362       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2363       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2364       codegen_->Load(field_type, OutputCPURegister(instruction), field);
   2365       codegen_->MaybeRecordImplicitNullCheck(instruction);
   2366     }
   2367     if (field_type == Primitive::kPrimNot) {
   2368       // If read barriers are enabled, emit read barriers other than
   2369       // Baker's using a slow path (and also unpoison the loaded
   2370       // reference, if heap poisoning is enabled).
   2371       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
   2372     }
   2373   }
   2374 }
   2375 
   2376 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
   2377   LocationSummary* locations =
   2378       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   2379   locations->SetInAt(0, Location::RequiresRegister());
   2380   if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
   2381     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
   2382   } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
   2383     locations->SetInAt(1, Location::RequiresFpuRegister());
   2384   } else {
   2385     locations->SetInAt(1, Location::RequiresRegister());
   2386   }
   2387 }
   2388 
   2389 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
   2390                                                    const FieldInfo& field_info,
   2391                                                    bool value_can_be_null) {
   2392   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   2393 
   2394   Register obj = InputRegisterAt(instruction, 0);
   2395   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
   2396   CPURegister source = value;
   2397   Offset offset = field_info.GetFieldOffset();
   2398   Primitive::Type field_type = field_info.GetFieldType();
   2399 
   2400   {
   2401     // We use a block to end the scratch scope before the write barrier, thus
   2402     // freeing the temporary registers so they can be used in `MarkGCCard`.
   2403     UseScratchRegisterScope temps(GetVIXLAssembler());
   2404 
   2405     if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
   2406       DCHECK(value.IsW());
   2407       Register temp = temps.AcquireW();
   2408       __ Mov(temp, value.W());
   2409       GetAssembler()->PoisonHeapReference(temp.W());
   2410       source = temp;
   2411     }
   2412 
   2413     if (field_info.IsVolatile()) {
   2414       codegen_->StoreRelease(
   2415           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
   2416     } else {
   2417       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   2418       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2419       codegen_->Store(field_type, source, HeapOperand(obj, offset));
   2420       codegen_->MaybeRecordImplicitNullCheck(instruction);
   2421     }
   2422   }
   2423 
   2424   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
   2425     codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
   2426   }
   2427 }
   2428 
   2429 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
   2430   Primitive::Type type = instr->GetType();
   2431 
   2432   switch (type) {
   2433     case Primitive::kPrimInt:
   2434     case Primitive::kPrimLong: {
   2435       Register dst = OutputRegister(instr);
   2436       Register lhs = InputRegisterAt(instr, 0);
   2437       Operand rhs = InputOperandAt(instr, 1);
   2438       if (instr->IsAdd()) {
   2439         __ Add(dst, lhs, rhs);
   2440       } else if (instr->IsAnd()) {
   2441         __ And(dst, lhs, rhs);
   2442       } else if (instr->IsOr()) {
   2443         __ Orr(dst, lhs, rhs);
   2444       } else if (instr->IsSub()) {
   2445         __ Sub(dst, lhs, rhs);
   2446       } else if (instr->IsRor()) {
   2447         if (rhs.IsImmediate()) {
   2448           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
   2449           __ Ror(dst, lhs, shift);
   2450         } else {
   2451           // Ensure shift distance is in the same size register as the result. If
   2452           // we are rotating a long and the shift comes in a w register originally,
   2453           // we don't need to sxtw for use as an x since the shift distances are
   2454           // all & reg_bits - 1.
   2455           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
   2456         }
   2457       } else {
   2458         DCHECK(instr->IsXor());
   2459         __ Eor(dst, lhs, rhs);
   2460       }
   2461       break;
   2462     }
   2463     case Primitive::kPrimFloat:
   2464     case Primitive::kPrimDouble: {
   2465       FPRegister dst = OutputFPRegister(instr);
   2466       FPRegister lhs = InputFPRegisterAt(instr, 0);
   2467       FPRegister rhs = InputFPRegisterAt(instr, 1);
   2468       if (instr->IsAdd()) {
   2469         __ Fadd(dst, lhs, rhs);
   2470       } else if (instr->IsSub()) {
   2471         __ Fsub(dst, lhs, rhs);
   2472       } else {
   2473         LOG(FATAL) << "Unexpected floating-point binary operation";
   2474       }
   2475       break;
   2476     }
   2477     default:
   2478       LOG(FATAL) << "Unexpected binary operation type " << type;
   2479   }
   2480 }
   2481 
   2482 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
   2483   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
   2484 
   2485   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   2486   Primitive::Type type = instr->GetResultType();
   2487   switch (type) {
   2488     case Primitive::kPrimInt:
   2489     case Primitive::kPrimLong: {
   2490       locations->SetInAt(0, Location::RequiresRegister());
   2491       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
   2492       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2493       break;
   2494     }
   2495     default:
   2496       LOG(FATAL) << "Unexpected shift type " << type;
   2497   }
   2498 }
   2499 
   2500 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
   2501   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
   2502 
   2503   Primitive::Type type = instr->GetType();
   2504   switch (type) {
   2505     case Primitive::kPrimInt:
   2506     case Primitive::kPrimLong: {
   2507       Register dst = OutputRegister(instr);
   2508       Register lhs = InputRegisterAt(instr, 0);
   2509       Operand rhs = InputOperandAt(instr, 1);
   2510       if (rhs.IsImmediate()) {
   2511         uint32_t shift_value = rhs.GetImmediate() &
   2512             (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
   2513         if (instr->IsShl()) {
   2514           __ Lsl(dst, lhs, shift_value);
   2515         } else if (instr->IsShr()) {
   2516           __ Asr(dst, lhs, shift_value);
   2517         } else {
   2518           __ Lsr(dst, lhs, shift_value);
   2519         }
   2520       } else {
   2521         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
   2522 
   2523         if (instr->IsShl()) {
   2524           __ Lsl(dst, lhs, rhs_reg);
   2525         } else if (instr->IsShr()) {
   2526           __ Asr(dst, lhs, rhs_reg);
   2527         } else {
   2528           __ Lsr(dst, lhs, rhs_reg);
   2529         }
   2530       }
   2531       break;
   2532     }
   2533     default:
   2534       LOG(FATAL) << "Unexpected shift operation type " << type;
   2535   }
   2536 }
   2537 
   2538 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
   2539   HandleBinaryOp(instruction);
   2540 }
   2541 
   2542 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
   2543   HandleBinaryOp(instruction);
   2544 }
   2545 
   2546 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
   2547   HandleBinaryOp(instruction);
   2548 }
   2549 
   2550 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
   2551   HandleBinaryOp(instruction);
   2552 }
   2553 
   2554 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   2555   DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
   2556   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   2557   locations->SetInAt(0, Location::RequiresRegister());
   2558   // There is no immediate variant of negated bitwise instructions in AArch64.
   2559   locations->SetInAt(1, Location::RequiresRegister());
   2560   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2561 }
   2562 
   2563 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   2564   Register dst = OutputRegister(instr);
   2565   Register lhs = InputRegisterAt(instr, 0);
   2566   Register rhs = InputRegisterAt(instr, 1);
   2567 
   2568   switch (instr->GetOpKind()) {
   2569     case HInstruction::kAnd:
   2570       __ Bic(dst, lhs, rhs);
   2571       break;
   2572     case HInstruction::kOr:
   2573       __ Orn(dst, lhs, rhs);
   2574       break;
   2575     case HInstruction::kXor:
   2576       __ Eon(dst, lhs, rhs);
   2577       break;
   2578     default:
   2579       LOG(FATAL) << "Unreachable";
   2580   }
   2581 }
   2582 
   2583 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
   2584     HDataProcWithShifterOp* instruction) {
   2585   DCHECK(instruction->GetType() == Primitive::kPrimInt ||
   2586          instruction->GetType() == Primitive::kPrimLong);
   2587   LocationSummary* locations =
   2588       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   2589   if (instruction->GetInstrKind() == HInstruction::kNeg) {
   2590     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
   2591   } else {
   2592     locations->SetInAt(0, Location::RequiresRegister());
   2593   }
   2594   locations->SetInAt(1, Location::RequiresRegister());
   2595   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2596 }
   2597 
   2598 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
   2599     HDataProcWithShifterOp* instruction) {
   2600   Primitive::Type type = instruction->GetType();
   2601   HInstruction::InstructionKind kind = instruction->GetInstrKind();
   2602   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
   2603   Register out = OutputRegister(instruction);
   2604   Register left;
   2605   if (kind != HInstruction::kNeg) {
   2606     left = InputRegisterAt(instruction, 0);
   2607   }
   2608   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
   2609   // shifter operand operation, the IR generating `right_reg` (input to the type
   2610   // conversion) can have a different type from the current instruction's type,
   2611   // so we manually indicate the type.
   2612   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
   2613   Operand right_operand(0);
   2614 
   2615   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
   2616   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
   2617     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
   2618   } else {
   2619     right_operand = Operand(right_reg,
   2620                             helpers::ShiftFromOpKind(op_kind),
   2621                             instruction->GetShiftAmount());
   2622   }
   2623 
   2624   // Logical binary operations do not support extension operations in the
   2625   // operand. Note that VIXL would still manage if it was passed by generating
   2626   // the extension as a separate instruction.
   2627   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
   2628   DCHECK(!right_operand.IsExtendedRegister() ||
   2629          (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
   2630           kind != HInstruction::kNeg));
   2631   switch (kind) {
   2632     case HInstruction::kAdd:
   2633       __ Add(out, left, right_operand);
   2634       break;
   2635     case HInstruction::kAnd:
   2636       __ And(out, left, right_operand);
   2637       break;
   2638     case HInstruction::kNeg:
   2639       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
   2640       __ Neg(out, right_operand);
   2641       break;
   2642     case HInstruction::kOr:
   2643       __ Orr(out, left, right_operand);
   2644       break;
   2645     case HInstruction::kSub:
   2646       __ Sub(out, left, right_operand);
   2647       break;
   2648     case HInstruction::kXor:
   2649       __ Eor(out, left, right_operand);
   2650       break;
   2651     default:
   2652       LOG(FATAL) << "Unexpected operation kind: " << kind;
   2653       UNREACHABLE();
   2654   }
   2655 }
   2656 
   2657 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   2658   LocationSummary* locations =
   2659       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   2660   locations->SetInAt(0, Location::RequiresRegister());
   2661   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
   2662   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2663 }
   2664 
   2665 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   2666   __ Add(OutputRegister(instruction),
   2667          InputRegisterAt(instruction, 0),
   2668          Operand(InputOperandAt(instruction, 1)));
   2669 }
   2670 
   2671 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
   2672   LocationSummary* locations =
   2673       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   2674 
   2675   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
   2676 
   2677   locations->SetInAt(0, Location::RequiresRegister());
   2678   // For byte case we don't need to shift the index variable so we can encode the data offset into
   2679   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
   2680   // data offset constant generation out of the loop and reduce the critical path length in the
   2681   // loop.
   2682   locations->SetInAt(1, shift->GetValue() == 0
   2683                         ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
   2684                         : Location::RequiresRegister());
   2685   locations->SetInAt(2, Location::ConstantLocation(shift));
   2686   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2687 }
   2688 
   2689 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
   2690     HIntermediateAddressIndex* instruction) {
   2691   Register index_reg = InputRegisterAt(instruction, 0);
   2692   uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
   2693   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
   2694 
   2695   if (shift == 0) {
   2696     __ Add(OutputRegister(instruction), index_reg, offset);
   2697   } else {
   2698     Register offset_reg = InputRegisterAt(instruction, 1);
   2699     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
   2700   }
   2701 }
   2702 
   2703 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   2704   LocationSummary* locations =
   2705       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
   2706   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
   2707   if (instr->GetOpKind() == HInstruction::kSub &&
   2708       accumulator->IsConstant() &&
   2709       accumulator->AsConstant()->IsArithmeticZero()) {
   2710     // Don't allocate register for Mneg instruction.
   2711   } else {
   2712     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
   2713                        Location::RequiresRegister());
   2714   }
   2715   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
   2716   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
   2717   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2718 }
   2719 
   2720 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   2721   Register res = OutputRegister(instr);
   2722   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
   2723   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
   2724 
   2725   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
   2726   // This fixup should be carried out for all multiply-accumulate instructions:
   2727   // madd, msub, smaddl, smsubl, umaddl and umsubl.
   2728   if (instr->GetType() == Primitive::kPrimLong &&
   2729       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
   2730     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
   2731     vixl::aarch64::Instruction* prev =
   2732         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
   2733     if (prev->IsLoadOrStore()) {
   2734       // Make sure we emit only exactly one nop.
   2735       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2736       __ nop();
   2737     }
   2738   }
   2739 
   2740   if (instr->GetOpKind() == HInstruction::kAdd) {
   2741     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
   2742     __ Madd(res, mul_left, mul_right, accumulator);
   2743   } else {
   2744     DCHECK(instr->GetOpKind() == HInstruction::kSub);
   2745     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
   2746     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
   2747       __ Mneg(res, mul_left, mul_right);
   2748     } else {
   2749       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
   2750       __ Msub(res, mul_left, mul_right, accumulator);
   2751     }
   2752   }
   2753 }
   2754 
   2755 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
   2756   bool object_array_get_with_read_barrier =
   2757       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   2758   LocationSummary* locations =
   2759       new (GetGraph()->GetArena()) LocationSummary(instruction,
   2760                                                    object_array_get_with_read_barrier ?
   2761                                                        LocationSummary::kCallOnSlowPath :
   2762                                                        LocationSummary::kNoCall);
   2763   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
   2764     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   2765     // We need a temporary register for the read barrier marking slow
   2766     // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
   2767     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
   2768         !Runtime::Current()->UseJitCompilation() &&
   2769         instruction->GetIndex()->IsConstant()) {
   2770       // Array loads with constant index are treated as field loads.
   2771       // If link-time thunks for the Baker read barrier are enabled, for AOT
   2772       // constant index loads we need a temporary only if the offset is too big.
   2773       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
   2774       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
   2775       offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
   2776       if (offset >= kReferenceLoadMinFarOffset) {
   2777         locations->AddTemp(FixedTempLocation());
   2778       }
   2779     } else {
   2780       locations->AddTemp(Location::RequiresRegister());
   2781     }
   2782   }
   2783   locations->SetInAt(0, Location::RequiresRegister());
   2784   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   2785   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   2786     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2787   } else {
   2788     // The output overlaps in the case of an object array get with
   2789     // read barriers enabled: we do not want the move to overwrite the
   2790     // array's location, as we need it to emit the read barrier.
   2791     locations->SetOut(
   2792         Location::RequiresRegister(),
   2793         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   2794   }
   2795 }
   2796 
   2797 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
   2798   Primitive::Type type = instruction->GetType();
   2799   Register obj = InputRegisterAt(instruction, 0);
   2800   LocationSummary* locations = instruction->GetLocations();
   2801   Location index = locations->InAt(1);
   2802   Location out = locations->Out();
   2803   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
   2804   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
   2805                                         instruction->IsStringCharAt();
   2806   MacroAssembler* masm = GetVIXLAssembler();
   2807   UseScratchRegisterScope temps(masm);
   2808 
   2809   // The read barrier instrumentation of object ArrayGet instructions
   2810   // does not support the HIntermediateAddress instruction.
   2811   DCHECK(!((type == Primitive::kPrimNot) &&
   2812            instruction->GetArray()->IsIntermediateAddress() &&
   2813            kEmitCompilerReadBarrier));
   2814 
   2815   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2816     // Object ArrayGet with Baker's read barrier case.
   2817     // Note that a potential implicit null check is handled in the
   2818     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
   2819     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
   2820     if (index.IsConstant()) {
   2821       // Array load with a constant index can be treated as a field load.
   2822       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
   2823       Location maybe_temp =
   2824           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
   2825       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
   2826                                                       out,
   2827                                                       obj.W(),
   2828                                                       offset,
   2829                                                       maybe_temp,
   2830                                                       /* needs_null_check */ false,
   2831                                                       /* use_load_acquire */ false);
   2832     } else {
   2833       Register temp = WRegisterFrom(locations->GetTemp(0));
   2834       codegen_->GenerateArrayLoadWithBakerReadBarrier(
   2835           instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
   2836     }
   2837   } else {
   2838     // General case.
   2839     MemOperand source = HeapOperand(obj);
   2840     Register length;
   2841     if (maybe_compressed_char_at) {
   2842       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   2843       length = temps.AcquireW();
   2844       {
   2845         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2846         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2847 
   2848         if (instruction->GetArray()->IsIntermediateAddress()) {
   2849           DCHECK_LT(count_offset, offset);
   2850           int64_t adjusted_offset =
   2851               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
   2852           // Note that `adjusted_offset` is negative, so this will be a LDUR.
   2853           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
   2854         } else {
   2855           __ Ldr(length, HeapOperand(obj, count_offset));
   2856         }
   2857         codegen_->MaybeRecordImplicitNullCheck(instruction);
   2858       }
   2859     }
   2860     if (index.IsConstant()) {
   2861       if (maybe_compressed_char_at) {
   2862         vixl::aarch64::Label uncompressed_load, done;
   2863         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   2864                       "Expecting 0=compressed, 1=uncompressed");
   2865         __ Tbnz(length.W(), 0, &uncompressed_load);
   2866         __ Ldrb(Register(OutputCPURegister(instruction)),
   2867                 HeapOperand(obj, offset + Int64ConstantFrom(index)));
   2868         __ B(&done);
   2869         __ Bind(&uncompressed_load);
   2870         __ Ldrh(Register(OutputCPURegister(instruction)),
   2871                 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
   2872         __ Bind(&done);
   2873       } else {
   2874         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
   2875         source = HeapOperand(obj, offset);
   2876       }
   2877     } else {
   2878       Register temp = temps.AcquireSameSizeAs(obj);
   2879       if (instruction->GetArray()->IsIntermediateAddress()) {
   2880         // We do not need to compute the intermediate address from the array: the
   2881         // input instruction has done it already. See the comment in
   2882         // `TryExtractArrayAccessAddress()`.
   2883         if (kIsDebugBuild) {
   2884           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
   2885           DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
   2886         }
   2887         temp = obj;
   2888       } else {
   2889         __ Add(temp, obj, offset);
   2890       }
   2891       if (maybe_compressed_char_at) {
   2892         vixl::aarch64::Label uncompressed_load, done;
   2893         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   2894                       "Expecting 0=compressed, 1=uncompressed");
   2895         __ Tbnz(length.W(), 0, &uncompressed_load);
   2896         __ Ldrb(Register(OutputCPURegister(instruction)),
   2897                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
   2898         __ B(&done);
   2899         __ Bind(&uncompressed_load);
   2900         __ Ldrh(Register(OutputCPURegister(instruction)),
   2901                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
   2902         __ Bind(&done);
   2903       } else {
   2904         source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
   2905       }
   2906     }
   2907     if (!maybe_compressed_char_at) {
   2908       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2909       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2910       codegen_->Load(type, OutputCPURegister(instruction), source);
   2911       codegen_->MaybeRecordImplicitNullCheck(instruction);
   2912     }
   2913 
   2914     if (type == Primitive::kPrimNot) {
   2915       static_assert(
   2916           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   2917           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   2918       Location obj_loc = locations->InAt(0);
   2919       if (index.IsConstant()) {
   2920         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
   2921       } else {
   2922         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
   2923       }
   2924     }
   2925   }
   2926 }
   2927 
   2928 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
   2929   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   2930   locations->SetInAt(0, Location::RequiresRegister());
   2931   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2932 }
   2933 
   2934 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
   2935   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   2936   vixl::aarch64::Register out = OutputRegister(instruction);
   2937   {
   2938     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2939     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2940     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
   2941     codegen_->MaybeRecordImplicitNullCheck(instruction);
   2942   }
   2943   // Mask out compression flag from String's array length.
   2944   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
   2945     __ Lsr(out.W(), out.W(), 1u);
   2946   }
   2947 }
   2948 
   2949 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
   2950   Primitive::Type value_type = instruction->GetComponentType();
   2951 
   2952   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   2953   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
   2954       instruction,
   2955       may_need_runtime_call_for_type_check ?
   2956           LocationSummary::kCallOnSlowPath :
   2957           LocationSummary::kNoCall);
   2958   locations->SetInAt(0, Location::RequiresRegister());
   2959   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   2960   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
   2961     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
   2962   } else if (Primitive::IsFloatingPointType(value_type)) {
   2963     locations->SetInAt(2, Location::RequiresFpuRegister());
   2964   } else {
   2965     locations->SetInAt(2, Location::RequiresRegister());
   2966   }
   2967 }
   2968 
   2969 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   2970   Primitive::Type value_type = instruction->GetComponentType();
   2971   LocationSummary* locations = instruction->GetLocations();
   2972   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   2973   bool needs_write_barrier =
   2974       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   2975 
   2976   Register array = InputRegisterAt(instruction, 0);
   2977   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
   2978   CPURegister source = value;
   2979   Location index = locations->InAt(1);
   2980   size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
   2981   MemOperand destination = HeapOperand(array);
   2982   MacroAssembler* masm = GetVIXLAssembler();
   2983 
   2984   if (!needs_write_barrier) {
   2985     DCHECK(!may_need_runtime_call_for_type_check);
   2986     if (index.IsConstant()) {
   2987       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
   2988       destination = HeapOperand(array, offset);
   2989     } else {
   2990       UseScratchRegisterScope temps(masm);
   2991       Register temp = temps.AcquireSameSizeAs(array);
   2992       if (instruction->GetArray()->IsIntermediateAddress()) {
   2993         // We do not need to compute the intermediate address from the array: the
   2994         // input instruction has done it already. See the comment in
   2995         // `TryExtractArrayAccessAddress()`.
   2996         if (kIsDebugBuild) {
   2997           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
   2998           DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
   2999         }
   3000         temp = array;
   3001       } else {
   3002         __ Add(temp, array, offset);
   3003       }
   3004       destination = HeapOperand(temp,
   3005                                 XRegisterFrom(index),
   3006                                 LSL,
   3007                                 Primitive::ComponentSizeShift(value_type));
   3008     }
   3009     {
   3010       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   3011       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   3012       codegen_->Store(value_type, value, destination);
   3013       codegen_->MaybeRecordImplicitNullCheck(instruction);
   3014     }
   3015   } else {
   3016     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
   3017     vixl::aarch64::Label done;
   3018     SlowPathCodeARM64* slow_path = nullptr;
   3019     {
   3020       // We use a block to end the scratch scope before the write barrier, thus
   3021       // freeing the temporary registers so they can be used in `MarkGCCard`.
   3022       UseScratchRegisterScope temps(masm);
   3023       Register temp = temps.AcquireSameSizeAs(array);
   3024       if (index.IsConstant()) {
   3025         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
   3026         destination = HeapOperand(array, offset);
   3027       } else {
   3028         destination = HeapOperand(temp,
   3029                                   XRegisterFrom(index),
   3030                                   LSL,
   3031                                   Primitive::ComponentSizeShift(value_type));
   3032       }
   3033 
   3034       uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   3035       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   3036       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   3037 
   3038       if (may_need_runtime_call_for_type_check) {
   3039         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
   3040         codegen_->AddSlowPath(slow_path);
   3041         if (instruction->GetValueCanBeNull()) {
   3042           vixl::aarch64::Label non_zero;
   3043           __ Cbnz(Register(value), &non_zero);
   3044           if (!index.IsConstant()) {
   3045             __ Add(temp, array, offset);
   3046           }
   3047           {
   3048             // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
   3049             // emitted.
   3050             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   3051             __ Str(wzr, destination);
   3052             codegen_->MaybeRecordImplicitNullCheck(instruction);
   3053           }
   3054           __ B(&done);
   3055           __ Bind(&non_zero);
   3056         }
   3057 
   3058         // Note that when Baker read barriers are enabled, the type
   3059         // checks are performed without read barriers.  This is fine,
   3060         // even in the case where a class object is in the from-space
   3061         // after the flip, as a comparison involving such a type would
   3062         // not produce a false positive; it may of course produce a
   3063         // false negative, in which case we would take the ArraySet
   3064         // slow path.
   3065 
   3066         Register temp2 = temps.AcquireSameSizeAs(array);
   3067         // /* HeapReference<Class> */ temp = array->klass_
   3068         {
   3069           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   3070           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   3071           __ Ldr(temp, HeapOperand(array, class_offset));
   3072           codegen_->MaybeRecordImplicitNullCheck(instruction);
   3073         }
   3074         GetAssembler()->MaybeUnpoisonHeapReference(temp);
   3075 
   3076         // /* HeapReference<Class> */ temp = temp->component_type_
   3077         __ Ldr(temp, HeapOperand(temp, component_offset));
   3078         // /* HeapReference<Class> */ temp2 = value->klass_
   3079         __ Ldr(temp2, HeapOperand(Register(value), class_offset));
   3080         // If heap poisoning is enabled, no need to unpoison `temp`
   3081         // nor `temp2`, as we are comparing two poisoned references.
   3082         __ Cmp(temp, temp2);
   3083         temps.Release(temp2);
   3084 
   3085         if (instruction->StaticTypeOfArrayIsObjectArray()) {
   3086           vixl::aarch64::Label do_put;
   3087           __ B(eq, &do_put);
   3088           // If heap poisoning is enabled, the `temp` reference has
   3089           // not been unpoisoned yet; unpoison it now.
   3090           GetAssembler()->MaybeUnpoisonHeapReference(temp);
   3091 
   3092           // /* HeapReference<Class> */ temp = temp->super_class_
   3093           __ Ldr(temp, HeapOperand(temp, super_offset));
   3094           // If heap poisoning is enabled, no need to unpoison
   3095           // `temp`, as we are comparing against null below.
   3096           __ Cbnz(temp, slow_path->GetEntryLabel());
   3097           __ Bind(&do_put);
   3098         } else {
   3099           __ B(ne, slow_path->GetEntryLabel());
   3100         }
   3101       }
   3102 
   3103       if (kPoisonHeapReferences) {
   3104         Register temp2 = temps.AcquireSameSizeAs(array);
   3105           DCHECK(value.IsW());
   3106         __ Mov(temp2, value.W());
   3107         GetAssembler()->PoisonHeapReference(temp2);
   3108         source = temp2;
   3109       }
   3110 
   3111       if (!index.IsConstant()) {
   3112         __ Add(temp, array, offset);
   3113       } else {
   3114         // We no longer need the `temp` here so release it as the store below may
   3115         // need a scratch register (if the constant index makes the offset too large)
   3116         // and the poisoned `source` could be using the other scratch register.
   3117         temps.Release(temp);
   3118       }
   3119       {
   3120         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   3121         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   3122         __ Str(source, destination);
   3123 
   3124         if (!may_need_runtime_call_for_type_check) {
   3125           codegen_->MaybeRecordImplicitNullCheck(instruction);
   3126         }
   3127       }
   3128     }
   3129 
   3130     codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
   3131 
   3132     if (done.IsLinked()) {
   3133       __ Bind(&done);
   3134     }
   3135 
   3136     if (slow_path != nullptr) {
   3137       __ Bind(slow_path->GetExitLabel());
   3138     }
   3139   }
   3140 }
   3141 
   3142 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
   3143   RegisterSet caller_saves = RegisterSet::Empty();
   3144   InvokeRuntimeCallingConvention calling_convention;
   3145   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   3146   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
   3147   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
   3148   locations->SetInAt(0, Location::RequiresRegister());
   3149   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   3150 }
   3151 
   3152 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
   3153   BoundsCheckSlowPathARM64* slow_path =
   3154       new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
   3155   codegen_->AddSlowPath(slow_path);
   3156   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
   3157   __ B(slow_path->GetEntryLabel(), hs);
   3158 }
   3159 
   3160 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
   3161   LocationSummary* locations =
   3162       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
   3163   locations->SetInAt(0, Location::RequiresRegister());
   3164   if (check->HasUses()) {
   3165     locations->SetOut(Location::SameAsFirstInput());
   3166   }
   3167 }
   3168 
   3169 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
   3170   // We assume the class is not null.
   3171   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
   3172       check->GetLoadClass(), check, check->GetDexPc(), true);
   3173   codegen_->AddSlowPath(slow_path);
   3174   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
   3175 }
   3176 
   3177 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
   3178   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
   3179       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
   3180 }
   3181 
   3182 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
   3183   FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
   3184   Location rhs_loc = instruction->GetLocations()->InAt(1);
   3185   if (rhs_loc.IsConstant()) {
   3186     // 0.0 is the only immediate that can be encoded directly in
   3187     // an FCMP instruction.
   3188     //
   3189     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
   3190     // specify that in a floating-point comparison, positive zero
   3191     // and negative zero are considered equal, so we can use the
   3192     // literal 0.0 for both cases here.
   3193     //
   3194     // Note however that some methods (Float.equal, Float.compare,
   3195     // Float.compareTo, Double.equal, Double.compare,
   3196     // Double.compareTo, Math.max, Math.min, StrictMath.max,
   3197     // StrictMath.min) consider 0.0 to be (strictly) greater than
   3198     // -0.0. So if we ever translate calls to these methods into a
   3199     // HCompare instruction, we must handle the -0.0 case with
   3200     // care here.
   3201     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
   3202     __ Fcmp(lhs_reg, 0.0);
   3203   } else {
   3204     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
   3205   }
   3206 }
   3207 
   3208 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
   3209   LocationSummary* locations =
   3210       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   3211   Primitive::Type in_type = compare->InputAt(0)->GetType();
   3212   switch (in_type) {
   3213     case Primitive::kPrimBoolean:
   3214     case Primitive::kPrimByte:
   3215     case Primitive::kPrimShort:
   3216     case Primitive::kPrimChar:
   3217     case Primitive::kPrimInt:
   3218     case Primitive::kPrimLong: {
   3219       locations->SetInAt(0, Location::RequiresRegister());
   3220       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
   3221       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3222       break;
   3223     }
   3224     case Primitive::kPrimFloat:
   3225     case Primitive::kPrimDouble: {
   3226       locations->SetInAt(0, Location::RequiresFpuRegister());
   3227       locations->SetInAt(1,
   3228                          IsFloatingPointZeroConstant(compare->InputAt(1))
   3229                              ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
   3230                              : Location::RequiresFpuRegister());
   3231       locations->SetOut(Location::RequiresRegister());
   3232       break;
   3233     }
   3234     default:
   3235       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
   3236   }
   3237 }
   3238 
   3239 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
   3240   Primitive::Type in_type = compare->InputAt(0)->GetType();
   3241 
   3242   //  0 if: left == right
   3243   //  1 if: left  > right
   3244   // -1 if: left  < right
   3245   switch (in_type) {
   3246     case Primitive::kPrimBoolean:
   3247     case Primitive::kPrimByte:
   3248     case Primitive::kPrimShort:
   3249     case Primitive::kPrimChar:
   3250     case Primitive::kPrimInt:
   3251     case Primitive::kPrimLong: {
   3252       Register result = OutputRegister(compare);
   3253       Register left = InputRegisterAt(compare, 0);
   3254       Operand right = InputOperandAt(compare, 1);
   3255       __ Cmp(left, right);
   3256       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
   3257       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
   3258       break;
   3259     }
   3260     case Primitive::kPrimFloat:
   3261     case Primitive::kPrimDouble: {
   3262       Register result = OutputRegister(compare);
   3263       GenerateFcmp(compare);
   3264       __ Cset(result, ne);
   3265       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
   3266       break;
   3267     }
   3268     default:
   3269       LOG(FATAL) << "Unimplemented compare type " << in_type;
   3270   }
   3271 }
   3272 
   3273 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
   3274   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   3275 
   3276   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
   3277     locations->SetInAt(0, Location::RequiresFpuRegister());
   3278     locations->SetInAt(1,
   3279                        IsFloatingPointZeroConstant(instruction->InputAt(1))
   3280                            ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
   3281                            : Location::RequiresFpuRegister());
   3282   } else {
   3283     // Integer cases.
   3284     locations->SetInAt(0, Location::RequiresRegister());
   3285     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   3286   }
   3287 
   3288   if (!instruction->IsEmittedAtUseSite()) {
   3289     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3290   }
   3291 }
   3292 
   3293 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
   3294   if (instruction->IsEmittedAtUseSite()) {
   3295     return;
   3296   }
   3297 
   3298   LocationSummary* locations = instruction->GetLocations();
   3299   Register res = RegisterFrom(locations->Out(), instruction->GetType());
   3300   IfCondition if_cond = instruction->GetCondition();
   3301 
   3302   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
   3303     GenerateFcmp(instruction);
   3304     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
   3305   } else {
   3306     // Integer cases.
   3307     Register lhs = InputRegisterAt(instruction, 0);
   3308     Operand rhs = InputOperandAt(instruction, 1);
   3309     __ Cmp(lhs, rhs);
   3310     __ Cset(res, ARM64Condition(if_cond));
   3311   }
   3312 }
   3313 
   3314 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
   3315   M(Equal)                                                                               \
   3316   M(NotEqual)                                                                            \
   3317   M(LessThan)                                                                            \
   3318   M(LessThanOrEqual)                                                                     \
   3319   M(GreaterThan)                                                                         \
   3320   M(GreaterThanOrEqual)                                                                  \
   3321   M(Below)                                                                               \
   3322   M(BelowOrEqual)                                                                        \
   3323   M(Above)                                                                               \
   3324   M(AboveOrEqual)
   3325 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
   3326 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
   3327 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
   3328 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
   3329 #undef DEFINE_CONDITION_VISITORS
   3330 #undef FOR_EACH_CONDITION_INSTRUCTION
   3331 
   3332 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
   3333   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3334 
   3335   LocationSummary* locations = instruction->GetLocations();
   3336   Location second = locations->InAt(1);
   3337   DCHECK(second.IsConstant());
   3338 
   3339   Register out = OutputRegister(instruction);
   3340   Register dividend = InputRegisterAt(instruction, 0);
   3341   int64_t imm = Int64FromConstant(second.GetConstant());
   3342   DCHECK(imm == 1 || imm == -1);
   3343 
   3344   if (instruction->IsRem()) {
   3345     __ Mov(out, 0);
   3346   } else {
   3347     if (imm == 1) {
   3348       __ Mov(out, dividend);
   3349     } else {
   3350       __ Neg(out, dividend);
   3351     }
   3352   }
   3353 }
   3354 
   3355 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
   3356   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3357 
   3358   LocationSummary* locations = instruction->GetLocations();
   3359   Location second = locations->InAt(1);
   3360   DCHECK(second.IsConstant());
   3361 
   3362   Register out = OutputRegister(instruction);
   3363   Register dividend = InputRegisterAt(instruction, 0);
   3364   int64_t imm = Int64FromConstant(second.GetConstant());
   3365   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   3366   int ctz_imm = CTZ(abs_imm);
   3367 
   3368   UseScratchRegisterScope temps(GetVIXLAssembler());
   3369   Register temp = temps.AcquireSameSizeAs(out);
   3370 
   3371   if (instruction->IsDiv()) {
   3372     __ Add(temp, dividend, abs_imm - 1);
   3373     __ Cmp(dividend, 0);
   3374     __ Csel(out, temp, dividend, lt);
   3375     if (imm > 0) {
   3376       __ Asr(out, out, ctz_imm);
   3377     } else {
   3378       __ Neg(out, Operand(out, ASR, ctz_imm));
   3379     }
   3380   } else {
   3381     int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64;
   3382     __ Asr(temp, dividend, bits - 1);
   3383     __ Lsr(temp, temp, bits - ctz_imm);
   3384     __ Add(out, dividend, temp);
   3385     __ And(out, out, abs_imm - 1);
   3386     __ Sub(out, out, temp);
   3387   }
   3388 }
   3389 
   3390 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
   3391   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3392 
   3393   LocationSummary* locations = instruction->GetLocations();
   3394   Location second = locations->InAt(1);
   3395   DCHECK(second.IsConstant());
   3396 
   3397   Register out = OutputRegister(instruction);
   3398   Register dividend = InputRegisterAt(instruction, 0);
   3399   int64_t imm = Int64FromConstant(second.GetConstant());
   3400 
   3401   Primitive::Type type = instruction->GetResultType();
   3402   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
   3403 
   3404   int64_t magic;
   3405   int shift;
   3406   CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift);
   3407 
   3408   UseScratchRegisterScope temps(GetVIXLAssembler());
   3409   Register temp = temps.AcquireSameSizeAs(out);
   3410 
   3411   // temp = get_high(dividend * magic)
   3412   __ Mov(temp, magic);
   3413   if (type == Primitive::kPrimLong) {
   3414     __ Smulh(temp, dividend, temp);
   3415   } else {
   3416     __ Smull(temp.X(), dividend, temp);
   3417     __ Lsr(temp.X(), temp.X(), 32);
   3418   }
   3419 
   3420   if (imm > 0 && magic < 0) {
   3421     __ Add(temp, temp, dividend);
   3422   } else if (imm < 0 && magic > 0) {
   3423     __ Sub(temp, temp, dividend);
   3424   }
   3425 
   3426   if (shift != 0) {
   3427     __ Asr(temp, temp, shift);
   3428   }
   3429 
   3430   if (instruction->IsDiv()) {
   3431     __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
   3432   } else {
   3433     __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
   3434     // TODO: Strength reduction for msub.
   3435     Register temp_imm = temps.AcquireSameSizeAs(out);
   3436     __ Mov(temp_imm, imm);
   3437     __ Msub(out, temp, temp_imm, dividend);
   3438   }
   3439 }
   3440 
   3441 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   3442   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3443   Primitive::Type type = instruction->GetResultType();
   3444   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
   3445 
   3446   LocationSummary* locations = instruction->GetLocations();
   3447   Register out = OutputRegister(instruction);
   3448   Location second = locations->InAt(1);
   3449 
   3450   if (second.IsConstant()) {
   3451     int64_t imm = Int64FromConstant(second.GetConstant());
   3452 
   3453     if (imm == 0) {
   3454       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   3455     } else if (imm == 1 || imm == -1) {
   3456       DivRemOneOrMinusOne(instruction);
   3457     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
   3458       DivRemByPowerOfTwo(instruction);
   3459     } else {
   3460       DCHECK(imm <= -2 || imm >= 2);
   3461       GenerateDivRemWithAnyConstant(instruction);
   3462     }
   3463   } else {
   3464     Register dividend = InputRegisterAt(instruction, 0);
   3465     Register divisor = InputRegisterAt(instruction, 1);
   3466     if (instruction->IsDiv()) {
   3467       __ Sdiv(out, dividend, divisor);
   3468     } else {
   3469       UseScratchRegisterScope temps(GetVIXLAssembler());
   3470       Register temp = temps.AcquireSameSizeAs(out);
   3471       __ Sdiv(temp, dividend, divisor);
   3472       __ Msub(out, temp, divisor, dividend);
   3473     }
   3474   }
   3475 }
   3476 
   3477 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
   3478   LocationSummary* locations =
   3479       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
   3480   switch (div->GetResultType()) {
   3481     case Primitive::kPrimInt:
   3482     case Primitive::kPrimLong:
   3483       locations->SetInAt(0, Location::RequiresRegister());
   3484       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
   3485       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3486       break;
   3487 
   3488     case Primitive::kPrimFloat:
   3489     case Primitive::kPrimDouble:
   3490       locations->SetInAt(0, Location::RequiresFpuRegister());
   3491       locations->SetInAt(1, Location::RequiresFpuRegister());
   3492       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   3493       break;
   3494 
   3495     default:
   3496       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3497   }
   3498 }
   3499 
   3500 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
   3501   Primitive::Type type = div->GetResultType();
   3502   switch (type) {
   3503     case Primitive::kPrimInt:
   3504     case Primitive::kPrimLong:
   3505       GenerateDivRemIntegral(div);
   3506       break;
   3507 
   3508     case Primitive::kPrimFloat:
   3509     case Primitive::kPrimDouble:
   3510       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
   3511       break;
   3512 
   3513     default:
   3514       LOG(FATAL) << "Unexpected div type " << type;
   3515   }
   3516 }
   3517 
   3518 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3519   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   3520   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   3521 }
   3522 
   3523 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3524   SlowPathCodeARM64* slow_path =
   3525       new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction);
   3526   codegen_->AddSlowPath(slow_path);
   3527   Location value = instruction->GetLocations()->InAt(0);
   3528 
   3529   Primitive::Type type = instruction->GetType();
   3530 
   3531   if (!Primitive::IsIntegralType(type)) {
   3532     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
   3533     return;
   3534   }
   3535 
   3536   if (value.IsConstant()) {
   3537     int64_t divisor = Int64ConstantFrom(value);
   3538     if (divisor == 0) {
   3539       __ B(slow_path->GetEntryLabel());
   3540     } else {
   3541       // A division by a non-null constant is valid. We don't need to perform
   3542       // any check, so simply fall through.
   3543     }
   3544   } else {
   3545     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
   3546   }
   3547 }
   3548 
   3549 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
   3550   LocationSummary* locations =
   3551       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   3552   locations->SetOut(Location::ConstantLocation(constant));
   3553 }
   3554 
   3555 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
   3556     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   3557   // Will be generated at use site.
   3558 }
   3559 
   3560 void LocationsBuilderARM64::VisitExit(HExit* exit) {
   3561   exit->SetLocations(nullptr);
   3562 }
   3563 
   3564 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
   3565 }
   3566 
   3567 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
   3568   LocationSummary* locations =
   3569       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   3570   locations->SetOut(Location::ConstantLocation(constant));
   3571 }
   3572 
   3573 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   3574   // Will be generated at use site.
   3575 }
   3576 
   3577 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   3578   DCHECK(!successor->IsExitBlock());
   3579   HBasicBlock* block = got->GetBlock();
   3580   HInstruction* previous = got->GetPrevious();
   3581   HLoopInformation* info = block->GetLoopInformation();
   3582 
   3583   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
   3584     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
   3585     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
   3586     return;
   3587   }
   3588   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
   3589     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   3590   }
   3591   if (!codegen_->GoesToNextBlock(block, successor)) {
   3592     __ B(codegen_->GetLabelOf(successor));
   3593   }
   3594 }
   3595 
   3596 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
   3597   got->SetLocations(nullptr);
   3598 }
   3599 
   3600 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
   3601   HandleGoto(got, got->GetSuccessor());
   3602 }
   3603 
   3604 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
   3605   try_boundary->SetLocations(nullptr);
   3606 }
   3607 
   3608 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
   3609   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
   3610   if (!successor->IsExitBlock()) {
   3611     HandleGoto(try_boundary, successor);
   3612   }
   3613 }
   3614 
   3615 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
   3616                                                           size_t condition_input_index,
   3617                                                           vixl::aarch64::Label* true_target,
   3618                                                           vixl::aarch64::Label* false_target) {
   3619   HInstruction* cond = instruction->InputAt(condition_input_index);
   3620 
   3621   if (true_target == nullptr && false_target == nullptr) {
   3622     // Nothing to do. The code always falls through.
   3623     return;
   3624   } else if (cond->IsIntConstant()) {
   3625     // Constant condition, statically compared against "true" (integer value 1).
   3626     if (cond->AsIntConstant()->IsTrue()) {
   3627       if (true_target != nullptr) {
   3628         __ B(true_target);
   3629       }
   3630     } else {
   3631       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
   3632       if (false_target != nullptr) {
   3633         __ B(false_target);
   3634       }
   3635     }
   3636     return;
   3637   }
   3638 
   3639   // The following code generates these patterns:
   3640   //  (1) true_target == nullptr && false_target != nullptr
   3641   //        - opposite condition true => branch to false_target
   3642   //  (2) true_target != nullptr && false_target == nullptr
   3643   //        - condition true => branch to true_target
   3644   //  (3) true_target != nullptr && false_target != nullptr
   3645   //        - condition true => branch to true_target
   3646   //        - branch to false_target
   3647   if (IsBooleanValueOrMaterializedCondition(cond)) {
   3648     // The condition instruction has been materialized, compare the output to 0.
   3649     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
   3650     DCHECK(cond_val.IsRegister());
   3651       if (true_target == nullptr) {
   3652       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
   3653     } else {
   3654       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
   3655     }
   3656   } else {
   3657     // The condition instruction has not been materialized, use its inputs as
   3658     // the comparison and its condition as the branch condition.
   3659     HCondition* condition = cond->AsCondition();
   3660 
   3661     Primitive::Type type = condition->InputAt(0)->GetType();
   3662     if (Primitive::IsFloatingPointType(type)) {
   3663       GenerateFcmp(condition);
   3664       if (true_target == nullptr) {
   3665         IfCondition opposite_condition = condition->GetOppositeCondition();
   3666         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
   3667       } else {
   3668         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
   3669       }
   3670     } else {
   3671       // Integer cases.
   3672       Register lhs = InputRegisterAt(condition, 0);
   3673       Operand rhs = InputOperandAt(condition, 1);
   3674 
   3675       Condition arm64_cond;
   3676       vixl::aarch64::Label* non_fallthrough_target;
   3677       if (true_target == nullptr) {
   3678         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
   3679         non_fallthrough_target = false_target;
   3680       } else {
   3681         arm64_cond = ARM64Condition(condition->GetCondition());
   3682         non_fallthrough_target = true_target;
   3683       }
   3684 
   3685       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
   3686           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
   3687         switch (arm64_cond) {
   3688           case eq:
   3689             __ Cbz(lhs, non_fallthrough_target);
   3690             break;
   3691           case ne:
   3692             __ Cbnz(lhs, non_fallthrough_target);
   3693             break;
   3694           case lt:
   3695             // Test the sign bit and branch accordingly.
   3696             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
   3697             break;
   3698           case ge:
   3699             // Test the sign bit and branch accordingly.
   3700             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
   3701             break;
   3702           default:
   3703             // Without the `static_cast` the compiler throws an error for
   3704             // `-Werror=sign-promo`.
   3705             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
   3706         }
   3707       } else {
   3708         __ Cmp(lhs, rhs);
   3709         __ B(arm64_cond, non_fallthrough_target);
   3710       }
   3711     }
   3712   }
   3713 
   3714   // If neither branch falls through (case 3), the conditional branch to `true_target`
   3715   // was already emitted (case 2) and we need to emit a jump to `false_target`.
   3716   if (true_target != nullptr && false_target != nullptr) {
   3717     __ B(false_target);
   3718   }
   3719 }
   3720 
   3721 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
   3722   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
   3723   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
   3724     locations->SetInAt(0, Location::RequiresRegister());
   3725   }
   3726 }
   3727 
   3728 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
   3729   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   3730   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
   3731   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
   3732   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
   3733     true_target = nullptr;
   3734   }
   3735   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
   3736   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
   3737     false_target = nullptr;
   3738   }
   3739   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
   3740 }
   3741 
   3742 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
   3743   LocationSummary* locations = new (GetGraph()->GetArena())
   3744       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   3745   InvokeRuntimeCallingConvention calling_convention;
   3746   RegisterSet caller_saves = RegisterSet::Empty();
   3747   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   3748   locations->SetCustomSlowPathCallerSaves(caller_saves);
   3749   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
   3750     locations->SetInAt(0, Location::RequiresRegister());
   3751   }
   3752 }
   3753 
   3754 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
   3755   SlowPathCodeARM64* slow_path =
   3756       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
   3757   GenerateTestAndBranch(deoptimize,
   3758                         /* condition_input_index */ 0,
   3759                         slow_path->GetEntryLabel(),
   3760                         /* false_target */ nullptr);
   3761 }
   3762 
   3763 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   3764   LocationSummary* locations = new (GetGraph()->GetArena())
   3765       LocationSummary(flag, LocationSummary::kNoCall);
   3766   locations->SetOut(Location::RequiresRegister());
   3767 }
   3768 
   3769 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   3770   __ Ldr(OutputRegister(flag),
   3771          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
   3772 }
   3773 
   3774 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
   3775   return condition->IsCondition() &&
   3776          Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
   3777 }
   3778 
   3779 static inline Condition GetConditionForSelect(HCondition* condition) {
   3780   IfCondition cond = condition->AsCondition()->GetCondition();
   3781   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
   3782                                                      : ARM64Condition(cond);
   3783 }
   3784 
   3785 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
   3786   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   3787   if (Primitive::IsFloatingPointType(select->GetType())) {
   3788     locations->SetInAt(0, Location::RequiresFpuRegister());
   3789     locations->SetInAt(1, Location::RequiresFpuRegister());
   3790     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   3791   } else {
   3792     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
   3793     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
   3794     bool is_true_value_constant = cst_true_value != nullptr;
   3795     bool is_false_value_constant = cst_false_value != nullptr;
   3796     // Ask VIXL whether we should synthesize constants in registers.
   3797     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
   3798     Operand true_op = is_true_value_constant ?
   3799         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
   3800     Operand false_op = is_false_value_constant ?
   3801         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
   3802     bool true_value_in_register = false;
   3803     bool false_value_in_register = false;
   3804     MacroAssembler::GetCselSynthesisInformation(
   3805         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
   3806     true_value_in_register |= !is_true_value_constant;
   3807     false_value_in_register |= !is_false_value_constant;
   3808 
   3809     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
   3810                                                  : Location::ConstantLocation(cst_true_value));
   3811     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
   3812                                                   : Location::ConstantLocation(cst_false_value));
   3813     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3814   }
   3815 
   3816   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
   3817     locations->SetInAt(2, Location::RequiresRegister());
   3818   }
   3819 }
   3820 
   3821 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
   3822   HInstruction* cond = select->GetCondition();
   3823   Condition csel_cond;
   3824 
   3825   if (IsBooleanValueOrMaterializedCondition(cond)) {
   3826     if (cond->IsCondition() && cond->GetNext() == select) {
   3827       // Use the condition flags set by the previous instruction.
   3828       csel_cond = GetConditionForSelect(cond->AsCondition());
   3829     } else {
   3830       __ Cmp(InputRegisterAt(select, 2), 0);
   3831       csel_cond = ne;
   3832     }
   3833   } else if (IsConditionOnFloatingPointValues(cond)) {
   3834     GenerateFcmp(cond);
   3835     csel_cond = GetConditionForSelect(cond->AsCondition());
   3836   } else {
   3837     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
   3838     csel_cond = GetConditionForSelect(cond->AsCondition());
   3839   }
   3840 
   3841   if (Primitive::IsFloatingPointType(select->GetType())) {
   3842     __ Fcsel(OutputFPRegister(select),
   3843              InputFPRegisterAt(select, 1),
   3844              InputFPRegisterAt(select, 0),
   3845              csel_cond);
   3846   } else {
   3847     __ Csel(OutputRegister(select),
   3848             InputOperandAt(select, 1),
   3849             InputOperandAt(select, 0),
   3850             csel_cond);
   3851   }
   3852 }
   3853 
   3854 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   3855   new (GetGraph()->GetArena()) LocationSummary(info);
   3856 }
   3857 
   3858 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
   3859   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
   3860 }
   3861 
   3862 void CodeGeneratorARM64::GenerateNop() {
   3863   __ Nop();
   3864 }
   3865 
   3866 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   3867   HandleFieldGet(instruction, instruction->GetFieldInfo());
   3868 }
   3869 
   3870 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   3871   HandleFieldGet(instruction, instruction->GetFieldInfo());
   3872 }
   3873 
   3874 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   3875   HandleFieldSet(instruction);
   3876 }
   3877 
   3878 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   3879   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   3880 }
   3881 
   3882 // Temp is used for read barrier.
   3883 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
   3884   if (kEmitCompilerReadBarrier &&
   3885       (kUseBakerReadBarrier ||
   3886           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   3887           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   3888           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
   3889     return 1;
   3890   }
   3891   return 0;
   3892 }
   3893 
   3894 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
   3895 // interface pointer, one for loading the current interface.
   3896 // The other checks have one temp for loading the object's class.
   3897 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
   3898   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
   3899     return 3;
   3900   }
   3901   return 1 + NumberOfInstanceOfTemps(type_check_kind);
   3902 }
   3903 
   3904 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   3905   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   3906   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   3907   bool baker_read_barrier_slow_path = false;
   3908   switch (type_check_kind) {
   3909     case TypeCheckKind::kExactCheck:
   3910     case TypeCheckKind::kAbstractClassCheck:
   3911     case TypeCheckKind::kClassHierarchyCheck:
   3912     case TypeCheckKind::kArrayObjectCheck:
   3913       call_kind =
   3914           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
   3915       baker_read_barrier_slow_path = kUseBakerReadBarrier;
   3916       break;
   3917     case TypeCheckKind::kArrayCheck:
   3918     case TypeCheckKind::kUnresolvedCheck:
   3919     case TypeCheckKind::kInterfaceCheck:
   3920       call_kind = LocationSummary::kCallOnSlowPath;
   3921       break;
   3922   }
   3923 
   3924   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   3925   if (baker_read_barrier_slow_path) {
   3926     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   3927   }
   3928   locations->SetInAt(0, Location::RequiresRegister());
   3929   locations->SetInAt(1, Location::RequiresRegister());
   3930   // The "out" register is used as a temporary, so it overlaps with the inputs.
   3931   // Note that TypeCheckSlowPathARM64 uses this register too.
   3932   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   3933   // Add temps if necessary for read barriers.
   3934   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
   3935 }
   3936 
   3937 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
   3938   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   3939   LocationSummary* locations = instruction->GetLocations();
   3940   Location obj_loc = locations->InAt(0);
   3941   Register obj = InputRegisterAt(instruction, 0);
   3942   Register cls = InputRegisterAt(instruction, 1);
   3943   Location out_loc = locations->Out();
   3944   Register out = OutputRegister(instruction);
   3945   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
   3946   DCHECK_LE(num_temps, 1u);
   3947   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   3948   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   3949   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   3950   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   3951   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   3952 
   3953   vixl::aarch64::Label done, zero;
   3954   SlowPathCodeARM64* slow_path = nullptr;
   3955 
   3956   // Return 0 if `obj` is null.
   3957   // Avoid null check if we know `obj` is not null.
   3958   if (instruction->MustDoNullCheck()) {
   3959     __ Cbz(obj, &zero);
   3960   }
   3961 
   3962   switch (type_check_kind) {
   3963     case TypeCheckKind::kExactCheck: {
   3964       // /* HeapReference<Class> */ out = obj->klass_
   3965       GenerateReferenceLoadTwoRegisters(instruction,
   3966                                         out_loc,
   3967                                         obj_loc,
   3968                                         class_offset,
   3969                                         maybe_temp_loc,
   3970                                         kCompilerReadBarrierOption);
   3971       __ Cmp(out, cls);
   3972       __ Cset(out, eq);
   3973       if (zero.IsLinked()) {
   3974         __ B(&done);
   3975       }
   3976       break;
   3977     }
   3978 
   3979     case TypeCheckKind::kAbstractClassCheck: {
   3980       // /* HeapReference<Class> */ out = obj->klass_
   3981       GenerateReferenceLoadTwoRegisters(instruction,
   3982                                         out_loc,
   3983                                         obj_loc,
   3984                                         class_offset,
   3985                                         maybe_temp_loc,
   3986                                         kCompilerReadBarrierOption);
   3987       // If the class is abstract, we eagerly fetch the super class of the
   3988       // object to avoid doing a comparison we know will fail.
   3989       vixl::aarch64::Label loop, success;
   3990       __ Bind(&loop);
   3991       // /* HeapReference<Class> */ out = out->super_class_
   3992       GenerateReferenceLoadOneRegister(instruction,
   3993                                        out_loc,
   3994                                        super_offset,
   3995                                        maybe_temp_loc,
   3996                                        kCompilerReadBarrierOption);
   3997       // If `out` is null, we use it for the result, and jump to `done`.
   3998       __ Cbz(out, &done);
   3999       __ Cmp(out, cls);
   4000       __ B(ne, &loop);
   4001       __ Mov(out, 1);
   4002       if (zero.IsLinked()) {
   4003         __ B(&done);
   4004       }
   4005       break;
   4006     }
   4007 
   4008     case TypeCheckKind::kClassHierarchyCheck: {
   4009       // /* HeapReference<Class> */ out = obj->klass_
   4010       GenerateReferenceLoadTwoRegisters(instruction,
   4011                                         out_loc,
   4012                                         obj_loc,
   4013                                         class_offset,
   4014                                         maybe_temp_loc,
   4015                                         kCompilerReadBarrierOption);
   4016       // Walk over the class hierarchy to find a match.
   4017       vixl::aarch64::Label loop, success;
   4018       __ Bind(&loop);
   4019       __ Cmp(out, cls);
   4020       __ B(eq, &success);
   4021       // /* HeapReference<Class> */ out = out->super_class_
   4022       GenerateReferenceLoadOneRegister(instruction,
   4023                                        out_loc,
   4024                                        super_offset,
   4025                                        maybe_temp_loc,
   4026                                        kCompilerReadBarrierOption);
   4027       __ Cbnz(out, &loop);
   4028       // If `out` is null, we use it for the result, and jump to `done`.
   4029       __ B(&done);
   4030       __ Bind(&success);
   4031       __ Mov(out, 1);
   4032       if (zero.IsLinked()) {
   4033         __ B(&done);
   4034       }
   4035       break;
   4036     }
   4037 
   4038     case TypeCheckKind::kArrayObjectCheck: {
   4039       // /* HeapReference<Class> */ out = obj->klass_
   4040       GenerateReferenceLoadTwoRegisters(instruction,
   4041                                         out_loc,
   4042                                         obj_loc,
   4043                                         class_offset,
   4044                                         maybe_temp_loc,
   4045                                         kCompilerReadBarrierOption);
   4046       // Do an exact check.
   4047       vixl::aarch64::Label exact_check;
   4048       __ Cmp(out, cls);
   4049       __ B(eq, &exact_check);
   4050       // Otherwise, we need to check that the object's class is a non-primitive array.
   4051       // /* HeapReference<Class> */ out = out->component_type_
   4052       GenerateReferenceLoadOneRegister(instruction,
   4053                                        out_loc,
   4054                                        component_offset,
   4055                                        maybe_temp_loc,
   4056                                        kCompilerReadBarrierOption);
   4057       // If `out` is null, we use it for the result, and jump to `done`.
   4058       __ Cbz(out, &done);
   4059       __ Ldrh(out, HeapOperand(out, primitive_offset));
   4060       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   4061       __ Cbnz(out, &zero);
   4062       __ Bind(&exact_check);
   4063       __ Mov(out, 1);
   4064       __ B(&done);
   4065       break;
   4066     }
   4067 
   4068     case TypeCheckKind::kArrayCheck: {
   4069       // No read barrier since the slow path will retry upon failure.
   4070       // /* HeapReference<Class> */ out = obj->klass_
   4071       GenerateReferenceLoadTwoRegisters(instruction,
   4072                                         out_loc,
   4073                                         obj_loc,
   4074                                         class_offset,
   4075                                         maybe_temp_loc,
   4076                                         kWithoutReadBarrier);
   4077       __ Cmp(out, cls);
   4078       DCHECK(locations->OnlyCallsOnSlowPath());
   4079       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
   4080                                                                       /* is_fatal */ false);
   4081       codegen_->AddSlowPath(slow_path);
   4082       __ B(ne, slow_path->GetEntryLabel());
   4083       __ Mov(out, 1);
   4084       if (zero.IsLinked()) {
   4085         __ B(&done);
   4086       }
   4087       break;
   4088     }
   4089 
   4090     case TypeCheckKind::kUnresolvedCheck:
   4091     case TypeCheckKind::kInterfaceCheck: {
   4092       // Note that we indeed only call on slow path, but we always go
   4093       // into the slow path for the unresolved and interface check
   4094       // cases.
   4095       //
   4096       // We cannot directly call the InstanceofNonTrivial runtime
   4097       // entry point without resorting to a type checking slow path
   4098       // here (i.e. by calling InvokeRuntime directly), as it would
   4099       // require to assign fixed registers for the inputs of this
   4100       // HInstanceOf instruction (following the runtime calling
   4101       // convention), which might be cluttered by the potential first
   4102       // read barrier emission at the beginning of this method.
   4103       //
   4104       // TODO: Introduce a new runtime entry point taking the object
   4105       // to test (instead of its class) as argument, and let it deal
   4106       // with the read barrier issues. This will let us refactor this
   4107       // case of the `switch` code as it was previously (with a direct
   4108       // call to the runtime not using a type checking slow path).
   4109       // This should also be beneficial for the other cases above.
   4110       DCHECK(locations->OnlyCallsOnSlowPath());
   4111       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
   4112                                                                       /* is_fatal */ false);
   4113       codegen_->AddSlowPath(slow_path);
   4114       __ B(slow_path->GetEntryLabel());
   4115       if (zero.IsLinked()) {
   4116         __ B(&done);
   4117       }
   4118       break;
   4119     }
   4120   }
   4121 
   4122   if (zero.IsLinked()) {
   4123     __ Bind(&zero);
   4124     __ Mov(out, 0);
   4125   }
   4126 
   4127   if (done.IsLinked()) {
   4128     __ Bind(&done);
   4129   }
   4130 
   4131   if (slow_path != nullptr) {
   4132     __ Bind(slow_path->GetExitLabel());
   4133   }
   4134 }
   4135 
   4136 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
   4137   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   4138   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
   4139 
   4140   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   4141   switch (type_check_kind) {
   4142     case TypeCheckKind::kExactCheck:
   4143     case TypeCheckKind::kAbstractClassCheck:
   4144     case TypeCheckKind::kClassHierarchyCheck:
   4145     case TypeCheckKind::kArrayObjectCheck:
   4146       call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
   4147           LocationSummary::kCallOnSlowPath :
   4148           LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
   4149       break;
   4150     case TypeCheckKind::kArrayCheck:
   4151     case TypeCheckKind::kUnresolvedCheck:
   4152     case TypeCheckKind::kInterfaceCheck:
   4153       call_kind = LocationSummary::kCallOnSlowPath;
   4154       break;
   4155   }
   4156 
   4157   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   4158   locations->SetInAt(0, Location::RequiresRegister());
   4159   locations->SetInAt(1, Location::RequiresRegister());
   4160   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
   4161   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
   4162 }
   4163 
   4164 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
   4165   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   4166   LocationSummary* locations = instruction->GetLocations();
   4167   Location obj_loc = locations->InAt(0);
   4168   Register obj = InputRegisterAt(instruction, 0);
   4169   Register cls = InputRegisterAt(instruction, 1);
   4170   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
   4171   DCHECK_GE(num_temps, 1u);
   4172   DCHECK_LE(num_temps, 3u);
   4173   Location temp_loc = locations->GetTemp(0);
   4174   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
   4175   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
   4176   Register temp = WRegisterFrom(temp_loc);
   4177   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   4178   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   4179   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   4180   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   4181   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
   4182   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
   4183   const uint32_t object_array_data_offset =
   4184       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   4185 
   4186   bool is_type_check_slow_path_fatal = false;
   4187   // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
   4188   // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
   4189   // read barriers is done for performance and code size reasons.
   4190   if (!kEmitCompilerReadBarrier) {
   4191     is_type_check_slow_path_fatal =
   4192         (type_check_kind == TypeCheckKind::kExactCheck ||
   4193          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   4194          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   4195          type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
   4196         !instruction->CanThrowIntoCatchBlock();
   4197   }
   4198   SlowPathCodeARM64* type_check_slow_path =
   4199       new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
   4200                                                           is_type_check_slow_path_fatal);
   4201   codegen_->AddSlowPath(type_check_slow_path);
   4202 
   4203   vixl::aarch64::Label done;
   4204   // Avoid null check if we know obj is not null.
   4205   if (instruction->MustDoNullCheck()) {
   4206     __ Cbz(obj, &done);
   4207   }
   4208 
   4209   switch (type_check_kind) {
   4210     case TypeCheckKind::kExactCheck:
   4211     case TypeCheckKind::kArrayCheck: {
   4212       // /* HeapReference<Class> */ temp = obj->klass_
   4213       GenerateReferenceLoadTwoRegisters(instruction,
   4214                                         temp_loc,
   4215                                         obj_loc,
   4216                                         class_offset,
   4217                                         maybe_temp2_loc,
   4218                                         kWithoutReadBarrier);
   4219 
   4220       __ Cmp(temp, cls);
   4221       // Jump to slow path for throwing the exception or doing a
   4222       // more involved array check.
   4223       __ B(ne, type_check_slow_path->GetEntryLabel());
   4224       break;
   4225     }
   4226 
   4227     case TypeCheckKind::kAbstractClassCheck: {
   4228       // /* HeapReference<Class> */ temp = obj->klass_
   4229       GenerateReferenceLoadTwoRegisters(instruction,
   4230                                         temp_loc,
   4231                                         obj_loc,
   4232                                         class_offset,
   4233                                         maybe_temp2_loc,
   4234                                         kWithoutReadBarrier);
   4235 
   4236       // If the class is abstract, we eagerly fetch the super class of the
   4237       // object to avoid doing a comparison we know will fail.
   4238       vixl::aarch64::Label loop;
   4239       __ Bind(&loop);
   4240       // /* HeapReference<Class> */ temp = temp->super_class_
   4241       GenerateReferenceLoadOneRegister(instruction,
   4242                                        temp_loc,
   4243                                        super_offset,
   4244                                        maybe_temp2_loc,
   4245                                        kWithoutReadBarrier);
   4246 
   4247       // If the class reference currently in `temp` is null, jump to the slow path to throw the
   4248       // exception.
   4249       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
   4250       // Otherwise, compare classes.
   4251       __ Cmp(temp, cls);
   4252       __ B(ne, &loop);
   4253       break;
   4254     }
   4255 
   4256     case TypeCheckKind::kClassHierarchyCheck: {
   4257       // /* HeapReference<Class> */ temp = obj->klass_
   4258       GenerateReferenceLoadTwoRegisters(instruction,
   4259                                         temp_loc,
   4260                                         obj_loc,
   4261                                         class_offset,
   4262                                         maybe_temp2_loc,
   4263                                         kWithoutReadBarrier);
   4264 
   4265       // Walk over the class hierarchy to find a match.
   4266       vixl::aarch64::Label loop;
   4267       __ Bind(&loop);
   4268       __ Cmp(temp, cls);
   4269       __ B(eq, &done);
   4270 
   4271       // /* HeapReference<Class> */ temp = temp->super_class_
   4272       GenerateReferenceLoadOneRegister(instruction,
   4273                                        temp_loc,
   4274                                        super_offset,
   4275                                        maybe_temp2_loc,
   4276                                        kWithoutReadBarrier);
   4277 
   4278       // If the class reference currently in `temp` is not null, jump
   4279       // back at the beginning of the loop.
   4280       __ Cbnz(temp, &loop);
   4281       // Otherwise, jump to the slow path to throw the exception.
   4282       __ B(type_check_slow_path->GetEntryLabel());
   4283       break;
   4284     }
   4285 
   4286     case TypeCheckKind::kArrayObjectCheck: {
   4287       // /* HeapReference<Class> */ temp = obj->klass_
   4288       GenerateReferenceLoadTwoRegisters(instruction,
   4289                                         temp_loc,
   4290                                         obj_loc,
   4291                                         class_offset,
   4292                                         maybe_temp2_loc,
   4293                                         kWithoutReadBarrier);
   4294 
   4295       // Do an exact check.
   4296       __ Cmp(temp, cls);
   4297       __ B(eq, &done);
   4298 
   4299       // Otherwise, we need to check that the object's class is a non-primitive array.
   4300       // /* HeapReference<Class> */ temp = temp->component_type_
   4301       GenerateReferenceLoadOneRegister(instruction,
   4302                                        temp_loc,
   4303                                        component_offset,
   4304                                        maybe_temp2_loc,
   4305                                        kWithoutReadBarrier);
   4306 
   4307       // If the component type is null, jump to the slow path to throw the exception.
   4308       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
   4309       // Otherwise, the object is indeed an array. Further check that this component type is not a
   4310       // primitive type.
   4311       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
   4312       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   4313       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
   4314       break;
   4315     }
   4316 
   4317     case TypeCheckKind::kUnresolvedCheck:
   4318       // We always go into the type check slow path for the unresolved check cases.
   4319       //
   4320       // We cannot directly call the CheckCast runtime entry point
   4321       // without resorting to a type checking slow path here (i.e. by
   4322       // calling InvokeRuntime directly), as it would require to
   4323       // assign fixed registers for the inputs of this HInstanceOf
   4324       // instruction (following the runtime calling convention), which
   4325       // might be cluttered by the potential first read barrier
   4326       // emission at the beginning of this method.
   4327       __ B(type_check_slow_path->GetEntryLabel());
   4328       break;
   4329     case TypeCheckKind::kInterfaceCheck: {
   4330       // /* HeapReference<Class> */ temp = obj->klass_
   4331       GenerateReferenceLoadTwoRegisters(instruction,
   4332                                         temp_loc,
   4333                                         obj_loc,
   4334                                         class_offset,
   4335                                         maybe_temp2_loc,
   4336                                         kWithoutReadBarrier);
   4337 
   4338       // /* HeapReference<Class> */ temp = temp->iftable_
   4339       GenerateReferenceLoadTwoRegisters(instruction,
   4340                                         temp_loc,
   4341                                         temp_loc,
   4342                                         iftable_offset,
   4343                                         maybe_temp2_loc,
   4344                                         kWithoutReadBarrier);
   4345       // Iftable is never null.
   4346       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
   4347       // Loop through the iftable and check if any class matches.
   4348       vixl::aarch64::Label start_loop;
   4349       __ Bind(&start_loop);
   4350       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
   4351       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
   4352       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
   4353       // Go to next interface.
   4354       __ Add(temp, temp, 2 * kHeapReferenceSize);
   4355       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
   4356       // Compare the classes and continue the loop if they do not match.
   4357       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
   4358       __ B(ne, &start_loop);
   4359       break;
   4360     }
   4361   }
   4362   __ Bind(&done);
   4363 
   4364   __ Bind(type_check_slow_path->GetExitLabel());
   4365 }
   4366 
   4367 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
   4368   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   4369   locations->SetOut(Location::ConstantLocation(constant));
   4370 }
   4371 
   4372 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   4373   // Will be generated at use site.
   4374 }
   4375 
   4376 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
   4377   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   4378   locations->SetOut(Location::ConstantLocation(constant));
   4379 }
   4380 
   4381 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   4382   // Will be generated at use site.
   4383 }
   4384 
   4385 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   4386   // The trampoline uses the same calling convention as dex calling conventions,
   4387   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
   4388   // the method_idx.
   4389   HandleInvoke(invoke);
   4390 }
   4391 
   4392 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   4393   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
   4394 }
   4395 
   4396 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
   4397   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
   4398   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
   4399 }
   4400 
   4401 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   4402   HandleInvoke(invoke);
   4403 }
   4404 
   4405 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   4406   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   4407   LocationSummary* locations = invoke->GetLocations();
   4408   Register temp = XRegisterFrom(locations->GetTemp(0));
   4409   Location receiver = locations->InAt(0);
   4410   Offset class_offset = mirror::Object::ClassOffset();
   4411   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
   4412 
   4413   // The register ip1 is required to be used for the hidden argument in
   4414   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
   4415   MacroAssembler* masm = GetVIXLAssembler();
   4416   UseScratchRegisterScope scratch_scope(masm);
   4417   scratch_scope.Exclude(ip1);
   4418   __ Mov(ip1, invoke->GetDexMethodIndex());
   4419 
   4420   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   4421   if (receiver.IsStackSlot()) {
   4422     __ Ldr(temp.W(), StackOperandFrom(receiver));
   4423     {
   4424       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   4425       // /* HeapReference<Class> */ temp = temp->klass_
   4426       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
   4427       codegen_->MaybeRecordImplicitNullCheck(invoke);
   4428     }
   4429   } else {
   4430     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   4431     // /* HeapReference<Class> */ temp = receiver->klass_
   4432     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   4433     codegen_->MaybeRecordImplicitNullCheck(invoke);
   4434   }
   4435 
   4436   // Instead of simply (possibly) unpoisoning `temp` here, we should
   4437   // emit a read barrier for the previous class reference load.
   4438   // However this is not required in practice, as this is an
   4439   // intermediate/temporary reference and because the current
   4440   // concurrent copying collector keeps the from-space memory
   4441   // intact/accessible until the end of the marking phase (the
   4442   // concurrent copying collector may not in the future).
   4443   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   4444   __ Ldr(temp,
   4445       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
   4446   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   4447       invoke->GetImtIndex(), kArm64PointerSize));
   4448   // temp = temp->GetImtEntryAt(method_offset);
   4449   __ Ldr(temp, MemOperand(temp, method_offset));
   4450   // lr = temp->GetEntryPoint();
   4451   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
   4452 
   4453   {
   4454     // Ensure the pc position is recorded immediately after the `blr` instruction.
   4455     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
   4456 
   4457     // lr();
   4458     __ blr(lr);
   4459     DCHECK(!codegen_->IsLeafMethod());
   4460     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   4461   }
   4462 }
   4463 
   4464 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   4465   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   4466   if (intrinsic.TryDispatch(invoke)) {
   4467     return;
   4468   }
   4469 
   4470   HandleInvoke(invoke);
   4471 }
   4472 
   4473 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   4474   // Explicit clinit checks triggered by static invokes must have been pruned by
   4475   // art::PrepareForRegisterAllocation.
   4476   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   4477 
   4478   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   4479   if (intrinsic.TryDispatch(invoke)) {
   4480     return;
   4481   }
   4482 
   4483   HandleInvoke(invoke);
   4484 }
   4485 
   4486 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
   4487   if (invoke->GetLocations()->Intrinsified()) {
   4488     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
   4489     intrinsic.Dispatch(invoke);
   4490     return true;
   4491   }
   4492   return false;
   4493 }
   4494 
   4495 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
   4496       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
   4497       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
   4498   // On ARM64 we support all dispatch types.
   4499   return desired_dispatch_info;
   4500 }
   4501 
   4502 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
   4503     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   4504   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
   4505   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   4506   switch (invoke->GetMethodLoadKind()) {
   4507     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
   4508       uint32_t offset =
   4509           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
   4510       // temp = thread->string_init_entrypoint
   4511       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
   4512       break;
   4513     }
   4514     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
   4515       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
   4516       break;
   4517     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
   4518       DCHECK(GetCompilerOptions().IsBootImage());
   4519       // Add ADRP with its PC-relative method patch.
   4520       vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
   4521       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
   4522       // Add ADD with its PC-relative method patch.
   4523       vixl::aarch64::Label* add_label =
   4524           NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label);
   4525       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
   4526       break;
   4527     }
   4528     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
   4529       // Load method address from literal pool.
   4530       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
   4531       break;
   4532     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
   4533       // Add ADRP with its PC-relative DexCache access patch.
   4534       MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
   4535       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
   4536       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
   4537       // Add LDR with its PC-relative DexCache access patch.
   4538       vixl::aarch64::Label* ldr_label =
   4539           NewMethodBssEntryPatch(target_method, adrp_label);
   4540       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
   4541       break;
   4542     }
   4543     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
   4544       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
   4545       return;  // No code pointer retrieval; the runtime performs the call directly.
   4546     }
   4547   }
   4548 
   4549   switch (invoke->GetCodePtrLocation()) {
   4550     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
   4551       {
   4552         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
   4553         ExactAssemblyScope eas(GetVIXLAssembler(),
   4554                                kInstructionSize,
   4555                                CodeBufferCheckScope::kExactSize);
   4556         __ bl(&frame_entry_label_);
   4557         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   4558       }
   4559       break;
   4560     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
   4561       // LR = callee_method->entry_point_from_quick_compiled_code_;
   4562       __ Ldr(lr, MemOperand(
   4563           XRegisterFrom(callee_method),
   4564           ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
   4565       {
   4566         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
   4567         ExactAssemblyScope eas(GetVIXLAssembler(),
   4568                                kInstructionSize,
   4569                                CodeBufferCheckScope::kExactSize);
   4570         // lr()
   4571         __ blr(lr);
   4572         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   4573       }
   4574       break;
   4575   }
   4576 
   4577   DCHECK(!IsLeafMethod());
   4578 }
   4579 
   4580 void CodeGeneratorARM64::GenerateVirtualCall(
   4581     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
   4582   // Use the calling convention instead of the location of the receiver, as
   4583   // intrinsics may have put the receiver in a different register. In the intrinsics
   4584   // slow path, the arguments have been moved to the right place, so here we are
   4585   // guaranteed that the receiver is the first register of the calling convention.
   4586   InvokeDexCallingConvention calling_convention;
   4587   Register receiver = calling_convention.GetRegisterAt(0);
   4588   Register temp = XRegisterFrom(temp_in);
   4589   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   4590       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
   4591   Offset class_offset = mirror::Object::ClassOffset();
   4592   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
   4593 
   4594   DCHECK(receiver.IsRegister());
   4595 
   4596   {
   4597     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   4598     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   4599     // /* HeapReference<Class> */ temp = receiver->klass_
   4600     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   4601     MaybeRecordImplicitNullCheck(invoke);
   4602   }
   4603   // Instead of simply (possibly) unpoisoning `temp` here, we should
   4604   // emit a read barrier for the previous class reference load.
   4605   // intermediate/temporary reference and because the current
   4606   // concurrent copying collector keeps the from-space memory
   4607   // intact/accessible until the end of the marking phase (the
   4608   // concurrent copying collector may not in the future).
   4609   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   4610   // temp = temp->GetMethodAt(method_offset);
   4611   __ Ldr(temp, MemOperand(temp, method_offset));
   4612   // lr = temp->GetEntryPoint();
   4613   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
   4614   {
   4615     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
   4616     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
   4617     // lr();
   4618     __ blr(lr);
   4619     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   4620   }
   4621 }
   4622 
   4623 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   4624   HandleInvoke(invoke);
   4625 }
   4626 
   4627 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   4628   codegen_->GenerateInvokePolymorphicCall(invoke);
   4629 }
   4630 
   4631 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch(
   4632     MethodReference target_method,
   4633     vixl::aarch64::Label* adrp_label) {
   4634   return NewPcRelativePatch(*target_method.dex_file,
   4635                             target_method.dex_method_index,
   4636                             adrp_label,
   4637                             &pc_relative_method_patches_);
   4638 }
   4639 
   4640 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
   4641     MethodReference target_method,
   4642     vixl::aarch64::Label* adrp_label) {
   4643   return NewPcRelativePatch(*target_method.dex_file,
   4644                             target_method.dex_method_index,
   4645                             adrp_label,
   4646                             &method_bss_entry_patches_);
   4647 }
   4648 
   4649 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
   4650     const DexFile& dex_file,
   4651     dex::TypeIndex type_index,
   4652     vixl::aarch64::Label* adrp_label) {
   4653   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_);
   4654 }
   4655 
   4656 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
   4657     const DexFile& dex_file,
   4658     dex::TypeIndex type_index,
   4659     vixl::aarch64::Label* adrp_label) {
   4660   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
   4661 }
   4662 
   4663 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
   4664     const DexFile& dex_file,
   4665     dex::StringIndex string_index,
   4666     vixl::aarch64::Label* adrp_label) {
   4667   return
   4668       NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
   4669 }
   4670 
   4671 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
   4672   baker_read_barrier_patches_.emplace_back(custom_data);
   4673   return &baker_read_barrier_patches_.back().label;
   4674 }
   4675 
   4676 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
   4677     const DexFile& dex_file,
   4678     uint32_t offset_or_index,
   4679     vixl::aarch64::Label* adrp_label,
   4680     ArenaDeque<PcRelativePatchInfo>* patches) {
   4681   // Add a patch entry and return the label.
   4682   patches->emplace_back(dex_file, offset_or_index);
   4683   PcRelativePatchInfo* info = &patches->back();
   4684   vixl::aarch64::Label* label = &info->label;
   4685   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
   4686   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
   4687   return label;
   4688 }
   4689 
   4690 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
   4691     uint64_t address) {
   4692   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
   4693 }
   4694 
   4695 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
   4696     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
   4697   jit_string_roots_.Overwrite(StringReference(&dex_file, string_index),
   4698                               reinterpret_cast64<uint64_t>(handle.GetReference()));
   4699   return jit_string_patches_.GetOrCreate(
   4700       StringReference(&dex_file, string_index),
   4701       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
   4702 }
   4703 
   4704 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
   4705     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
   4706   jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index),
   4707                              reinterpret_cast64<uint64_t>(handle.GetReference()));
   4708   return jit_class_patches_.GetOrCreate(
   4709       TypeReference(&dex_file, type_index),
   4710       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
   4711 }
   4712 
   4713 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
   4714                                              vixl::aarch64::Register reg) {
   4715   DCHECK(reg.IsX());
   4716   SingleEmissionCheckScope guard(GetVIXLAssembler());
   4717   __ Bind(fixup_label);
   4718   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
   4719 }
   4720 
   4721 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
   4722                                             vixl::aarch64::Register out,
   4723                                             vixl::aarch64::Register base) {
   4724   DCHECK(out.IsX());
   4725   DCHECK(base.IsX());
   4726   SingleEmissionCheckScope guard(GetVIXLAssembler());
   4727   __ Bind(fixup_label);
   4728   __ add(out, base, Operand(/* offset placeholder */ 0));
   4729 }
   4730 
   4731 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
   4732                                                   vixl::aarch64::Register out,
   4733                                                   vixl::aarch64::Register base) {
   4734   DCHECK(base.IsX());
   4735   SingleEmissionCheckScope guard(GetVIXLAssembler());
   4736   __ Bind(fixup_label);
   4737   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
   4738 }
   4739 
   4740 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
   4741 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
   4742     const ArenaDeque<PcRelativePatchInfo>& infos,
   4743     ArenaVector<LinkerPatch>* linker_patches) {
   4744   for (const PcRelativePatchInfo& info : infos) {
   4745     linker_patches->push_back(Factory(info.label.GetLocation(),
   4746                                       &info.target_dex_file,
   4747                                       info.pc_insn_label->GetLocation(),
   4748                                       info.offset_or_index));
   4749   }
   4750 }
   4751 
   4752 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   4753   DCHECK(linker_patches->empty());
   4754   size_t size =
   4755       pc_relative_method_patches_.size() +
   4756       method_bss_entry_patches_.size() +
   4757       pc_relative_type_patches_.size() +
   4758       type_bss_entry_patches_.size() +
   4759       pc_relative_string_patches_.size() +
   4760       baker_read_barrier_patches_.size();
   4761   linker_patches->reserve(size);
   4762   if (GetCompilerOptions().IsBootImage()) {
   4763     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
   4764                                                                   linker_patches);
   4765     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
   4766                                                                 linker_patches);
   4767     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
   4768                                                                   linker_patches);
   4769   } else {
   4770     DCHECK(pc_relative_method_patches_.empty());
   4771     DCHECK(pc_relative_type_patches_.empty());
   4772     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
   4773                                                                   linker_patches);
   4774   }
   4775   EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
   4776                                                                 linker_patches);
   4777   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
   4778                                                               linker_patches);
   4779   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
   4780     linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
   4781                                                                        info.custom_data));
   4782   }
   4783   DCHECK_EQ(size, linker_patches->size());
   4784 }
   4785 
   4786 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
   4787   return uint32_literals_.GetOrCreate(
   4788       value,
   4789       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
   4790 }
   4791 
   4792 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
   4793   return uint64_literals_.GetOrCreate(
   4794       value,
   4795       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
   4796 }
   4797 
   4798 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   4799   // Explicit clinit checks triggered by static invokes must have been pruned by
   4800   // art::PrepareForRegisterAllocation.
   4801   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   4802 
   4803   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   4804     return;
   4805   }
   4806 
   4807   // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
   4808   // are no pools emitted.
   4809   EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
   4810   LocationSummary* locations = invoke->GetLocations();
   4811   codegen_->GenerateStaticOrDirectCall(
   4812       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
   4813 }
   4814 
   4815 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   4816   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   4817     return;
   4818   }
   4819 
   4820   // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
   4821   // are no pools emitted.
   4822   EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
   4823   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   4824   DCHECK(!codegen_->IsLeafMethod());
   4825 }
   4826 
   4827 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
   4828     HLoadClass::LoadKind desired_class_load_kind) {
   4829   switch (desired_class_load_kind) {
   4830     case HLoadClass::LoadKind::kInvalid:
   4831       LOG(FATAL) << "UNREACHABLE";
   4832       UNREACHABLE();
   4833     case HLoadClass::LoadKind::kReferrersClass:
   4834       break;
   4835     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
   4836     case HLoadClass::LoadKind::kBssEntry:
   4837       DCHECK(!Runtime::Current()->UseJitCompilation());
   4838       break;
   4839     case HLoadClass::LoadKind::kJitTableAddress:
   4840       DCHECK(Runtime::Current()->UseJitCompilation());
   4841       break;
   4842     case HLoadClass::LoadKind::kBootImageAddress:
   4843     case HLoadClass::LoadKind::kRuntimeCall:
   4844       break;
   4845   }
   4846   return desired_class_load_kind;
   4847 }
   4848 
   4849 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
   4850   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   4851   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
   4852     InvokeRuntimeCallingConvention calling_convention;
   4853     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
   4854         cls,
   4855         LocationFrom(calling_convention.GetRegisterAt(0)),
   4856         LocationFrom(vixl::aarch64::x0));
   4857     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
   4858     return;
   4859   }
   4860   DCHECK(!cls->NeedsAccessCheck());
   4861 
   4862   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   4863   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
   4864       ? LocationSummary::kCallOnSlowPath
   4865       : LocationSummary::kNoCall;
   4866   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
   4867   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
   4868     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   4869   }
   4870 
   4871   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
   4872     locations->SetInAt(0, Location::RequiresRegister());
   4873   }
   4874   locations->SetOut(Location::RequiresRegister());
   4875   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
   4876     if (!kUseReadBarrier || kUseBakerReadBarrier) {
   4877       // Rely on the type resolution or initialization and marking to save everything we need.
   4878       locations->AddTemp(FixedTempLocation());
   4879       RegisterSet caller_saves = RegisterSet::Empty();
   4880       InvokeRuntimeCallingConvention calling_convention;
   4881       caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   4882       DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
   4883                 RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
   4884                              Primitive::kPrimNot).GetCode());
   4885       locations->SetCustomSlowPathCallerSaves(caller_saves);
   4886     } else {
   4887       // For non-Baker read barrier we have a temp-clobbering call.
   4888     }
   4889   }
   4890 }
   4891 
   4892 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   4893 // move.
   4894 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   4895   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   4896   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
   4897     codegen_->GenerateLoadClassRuntimeCall(cls);
   4898     return;
   4899   }
   4900   DCHECK(!cls->NeedsAccessCheck());
   4901 
   4902   Location out_loc = cls->GetLocations()->Out();
   4903   Register out = OutputRegister(cls);
   4904   Register bss_entry_temp;
   4905   vixl::aarch64::Label* bss_entry_adrp_label = nullptr;
   4906 
   4907   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
   4908       ? kWithoutReadBarrier
   4909       : kCompilerReadBarrierOption;
   4910   bool generate_null_check = false;
   4911   switch (load_kind) {
   4912     case HLoadClass::LoadKind::kReferrersClass: {
   4913       DCHECK(!cls->CanCallRuntime());
   4914       DCHECK(!cls->MustGenerateClinitCheck());
   4915       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
   4916       Register current_method = InputRegisterAt(cls, 0);
   4917       GenerateGcRootFieldLoad(cls,
   4918                               out_loc,
   4919                               current_method,
   4920                               ArtMethod::DeclaringClassOffset().Int32Value(),
   4921                               /* fixup_label */ nullptr,
   4922                               read_barrier_option);
   4923       break;
   4924     }
   4925     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
   4926       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   4927       // Add ADRP with its PC-relative type patch.
   4928       const DexFile& dex_file = cls->GetDexFile();
   4929       dex::TypeIndex type_index = cls->GetTypeIndex();
   4930       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
   4931       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
   4932       // Add ADD with its PC-relative type patch.
   4933       vixl::aarch64::Label* add_label =
   4934           codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
   4935       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
   4936       break;
   4937     }
   4938     case HLoadClass::LoadKind::kBootImageAddress: {
   4939       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   4940       uint32_t address = dchecked_integral_cast<uint32_t>(
   4941           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
   4942       DCHECK_NE(address, 0u);
   4943       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   4944       break;
   4945     }
   4946     case HLoadClass::LoadKind::kBssEntry: {
   4947       // Add ADRP with its PC-relative Class .bss entry patch.
   4948       const DexFile& dex_file = cls->GetDexFile();
   4949       dex::TypeIndex type_index = cls->GetTypeIndex();
   4950       bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0));
   4951       bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
   4952       codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
   4953       // Add LDR with its PC-relative Class patch.
   4954       vixl::aarch64::Label* ldr_label =
   4955           codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label);
   4956       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
   4957       GenerateGcRootFieldLoad(cls,
   4958                               out_loc,
   4959                               bss_entry_temp,
   4960                               /* offset placeholder */ 0u,
   4961                               ldr_label,
   4962                               read_barrier_option);
   4963       generate_null_check = true;
   4964       break;
   4965     }
   4966     case HLoadClass::LoadKind::kJitTableAddress: {
   4967       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
   4968                                                        cls->GetTypeIndex(),
   4969                                                        cls->GetClass()));
   4970       GenerateGcRootFieldLoad(cls,
   4971                               out_loc,
   4972                               out.X(),
   4973                               /* offset */ 0,
   4974                               /* fixup_label */ nullptr,
   4975                               read_barrier_option);
   4976       break;
   4977     }
   4978     case HLoadClass::LoadKind::kRuntimeCall:
   4979     case HLoadClass::LoadKind::kInvalid:
   4980       LOG(FATAL) << "UNREACHABLE";
   4981       UNREACHABLE();
   4982   }
   4983 
   4984   bool do_clinit = cls->MustGenerateClinitCheck();
   4985   if (generate_null_check || do_clinit) {
   4986     DCHECK(cls->CanCallRuntime());
   4987     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
   4988         cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label);
   4989     codegen_->AddSlowPath(slow_path);
   4990     if (generate_null_check) {
   4991       __ Cbz(out, slow_path->GetEntryLabel());
   4992     }
   4993     if (cls->MustGenerateClinitCheck()) {
   4994       GenerateClassInitializationCheck(slow_path, out);
   4995     } else {
   4996       __ Bind(slow_path->GetExitLabel());
   4997     }
   4998   }
   4999 }
   5000 
   5001 static MemOperand GetExceptionTlsAddress() {
   5002   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
   5003 }
   5004 
   5005 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
   5006   LocationSummary* locations =
   5007       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
   5008   locations->SetOut(Location::RequiresRegister());
   5009 }
   5010 
   5011 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
   5012   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
   5013 }
   5014 
   5015 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
   5016   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
   5017 }
   5018 
   5019 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
   5020   __ Str(wzr, GetExceptionTlsAddress());
   5021 }
   5022 
   5023 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
   5024     HLoadString::LoadKind desired_string_load_kind) {
   5025   switch (desired_string_load_kind) {
   5026     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
   5027     case HLoadString::LoadKind::kBssEntry:
   5028       DCHECK(!Runtime::Current()->UseJitCompilation());
   5029       break;
   5030     case HLoadString::LoadKind::kJitTableAddress:
   5031       DCHECK(Runtime::Current()->UseJitCompilation());
   5032       break;
   5033     case HLoadString::LoadKind::kBootImageAddress:
   5034     case HLoadString::LoadKind::kRuntimeCall:
   5035       break;
   5036   }
   5037   return desired_string_load_kind;
   5038 }
   5039 
   5040 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
   5041   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   5042   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   5043   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
   5044     InvokeRuntimeCallingConvention calling_convention;
   5045     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
   5046   } else {
   5047     locations->SetOut(Location::RequiresRegister());
   5048     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
   5049       if (!kUseReadBarrier || kUseBakerReadBarrier) {
   5050         // Rely on the pResolveString and marking to save everything we need.
   5051         locations->AddTemp(FixedTempLocation());
   5052         RegisterSet caller_saves = RegisterSet::Empty();
   5053         InvokeRuntimeCallingConvention calling_convention;
   5054         caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   5055         DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
   5056                   RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
   5057                                Primitive::kPrimNot).GetCode());
   5058         locations->SetCustomSlowPathCallerSaves(caller_saves);
   5059       } else {
   5060         // For non-Baker read barrier we have a temp-clobbering call.
   5061       }
   5062     }
   5063   }
   5064 }
   5065 
   5066 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   5067 // move.
   5068 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
   5069   Register out = OutputRegister(load);
   5070   Location out_loc = load->GetLocations()->Out();
   5071 
   5072   switch (load->GetLoadKind()) {
   5073     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
   5074       // Add ADRP with its PC-relative String patch.
   5075       const DexFile& dex_file = load->GetDexFile();
   5076       const dex::StringIndex string_index = load->GetStringIndex();
   5077       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
   5078       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
   5079       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
   5080       // Add ADD with its PC-relative String patch.
   5081       vixl::aarch64::Label* add_label =
   5082           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
   5083       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
   5084       return;  // No dex cache slow path.
   5085     }
   5086     case HLoadString::LoadKind::kBootImageAddress: {
   5087       uint32_t address = dchecked_integral_cast<uint32_t>(
   5088           reinterpret_cast<uintptr_t>(load->GetString().Get()));
   5089       DCHECK_NE(address, 0u);
   5090       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   5091       return;  // No dex cache slow path.
   5092     }
   5093     case HLoadString::LoadKind::kBssEntry: {
   5094       // Add ADRP with its PC-relative String .bss entry patch.
   5095       const DexFile& dex_file = load->GetDexFile();
   5096       const dex::StringIndex string_index = load->GetStringIndex();
   5097       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
   5098       Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
   5099       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
   5100       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
   5101       // Add LDR with its PC-relative String patch.
   5102       vixl::aarch64::Label* ldr_label =
   5103           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
   5104       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
   5105       GenerateGcRootFieldLoad(load,
   5106                               out_loc,
   5107                               temp,
   5108                               /* offset placeholder */ 0u,
   5109                               ldr_label,
   5110                               kCompilerReadBarrierOption);
   5111       SlowPathCodeARM64* slow_path =
   5112           new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
   5113       codegen_->AddSlowPath(slow_path);
   5114       __ Cbz(out.X(), slow_path->GetEntryLabel());
   5115       __ Bind(slow_path->GetExitLabel());
   5116       return;
   5117     }
   5118     case HLoadString::LoadKind::kJitTableAddress: {
   5119       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
   5120                                                         load->GetStringIndex(),
   5121                                                         load->GetString()));
   5122       GenerateGcRootFieldLoad(load,
   5123                               out_loc,
   5124                               out.X(),
   5125                               /* offset */ 0,
   5126                               /* fixup_label */ nullptr,
   5127                               kCompilerReadBarrierOption);
   5128       return;
   5129     }
   5130     default:
   5131       break;
   5132   }
   5133 
   5134   // TODO: Re-add the compiler code to do string dex cache lookup again.
   5135   InvokeRuntimeCallingConvention calling_convention;
   5136   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
   5137   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
   5138   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   5139   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
   5140 }
   5141 
   5142 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
   5143   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   5144   locations->SetOut(Location::ConstantLocation(constant));
   5145 }
   5146 
   5147 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   5148   // Will be generated at use site.
   5149 }
   5150 
   5151 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   5152   LocationSummary* locations =
   5153       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5154   InvokeRuntimeCallingConvention calling_convention;
   5155   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5156 }
   5157 
   5158 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   5159   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
   5160                           instruction,
   5161                           instruction->GetDexPc());
   5162   if (instruction->IsEnter()) {
   5163     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
   5164   } else {
   5165     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   5166   }
   5167 }
   5168 
   5169 void LocationsBuilderARM64::VisitMul(HMul* mul) {
   5170   LocationSummary* locations =
   5171       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
   5172   switch (mul->GetResultType()) {
   5173     case Primitive::kPrimInt:
   5174     case Primitive::kPrimLong:
   5175       locations->SetInAt(0, Location::RequiresRegister());
   5176       locations->SetInAt(1, Location::RequiresRegister());
   5177       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5178       break;
   5179 
   5180     case Primitive::kPrimFloat:
   5181     case Primitive::kPrimDouble:
   5182       locations->SetInAt(0, Location::RequiresFpuRegister());
   5183       locations->SetInAt(1, Location::RequiresFpuRegister());
   5184       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   5185       break;
   5186 
   5187     default:
   5188       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   5189   }
   5190 }
   5191 
   5192 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
   5193   switch (mul->GetResultType()) {
   5194     case Primitive::kPrimInt:
   5195     case Primitive::kPrimLong:
   5196       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
   5197       break;
   5198 
   5199     case Primitive::kPrimFloat:
   5200     case Primitive::kPrimDouble:
   5201       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
   5202       break;
   5203 
   5204     default:
   5205       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   5206   }
   5207 }
   5208 
   5209 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
   5210   LocationSummary* locations =
   5211       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   5212   switch (neg->GetResultType()) {
   5213     case Primitive::kPrimInt:
   5214     case Primitive::kPrimLong:
   5215       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
   5216       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5217       break;
   5218 
   5219     case Primitive::kPrimFloat:
   5220     case Primitive::kPrimDouble:
   5221       locations->SetInAt(0, Location::RequiresFpuRegister());
   5222       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   5223       break;
   5224 
   5225     default:
   5226       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   5227   }
   5228 }
   5229 
   5230 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
   5231   switch (neg->GetResultType()) {
   5232     case Primitive::kPrimInt:
   5233     case Primitive::kPrimLong:
   5234       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
   5235       break;
   5236 
   5237     case Primitive::kPrimFloat:
   5238     case Primitive::kPrimDouble:
   5239       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
   5240       break;
   5241 
   5242     default:
   5243       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   5244   }
   5245 }
   5246 
   5247 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
   5248   LocationSummary* locations =
   5249       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5250   InvokeRuntimeCallingConvention calling_convention;
   5251   locations->SetOut(LocationFrom(x0));
   5252   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5253   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   5254 }
   5255 
   5256 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
   5257   // Note: if heap poisoning is enabled, the entry point takes cares
   5258   // of poisoning the reference.
   5259   QuickEntrypointEnum entrypoint =
   5260       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
   5261   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   5262   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
   5263 }
   5264 
   5265 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
   5266   LocationSummary* locations =
   5267       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5268   InvokeRuntimeCallingConvention calling_convention;
   5269   if (instruction->IsStringAlloc()) {
   5270     locations->AddTemp(LocationFrom(kArtMethodRegister));
   5271   } else {
   5272     locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5273   }
   5274   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
   5275 }
   5276 
   5277 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
   5278   // Note: if heap poisoning is enabled, the entry point takes cares
   5279   // of poisoning the reference.
   5280   if (instruction->IsStringAlloc()) {
   5281     // String is allocated through StringFactory. Call NewEmptyString entry point.
   5282     Location temp = instruction->GetLocations()->GetTemp(0);
   5283     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
   5284     __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
   5285     __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
   5286 
   5287     {
   5288       // Ensure the pc position is recorded immediately after the `blr` instruction.
   5289       ExactAssemblyScope eas(GetVIXLAssembler(),
   5290                              kInstructionSize,
   5291                              CodeBufferCheckScope::kExactSize);
   5292       __ blr(lr);
   5293       codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   5294     }
   5295   } else {
   5296     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
   5297     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   5298   }
   5299 }
   5300 
   5301 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
   5302   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5303   locations->SetInAt(0, Location::RequiresRegister());
   5304   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5305 }
   5306 
   5307 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
   5308   switch (instruction->GetResultType()) {
   5309     case Primitive::kPrimInt:
   5310     case Primitive::kPrimLong:
   5311       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
   5312       break;
   5313 
   5314     default:
   5315       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
   5316   }
   5317 }
   5318 
   5319 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
   5320   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5321   locations->SetInAt(0, Location::RequiresRegister());
   5322   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5323 }
   5324 
   5325 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
   5326   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
   5327 }
   5328 
   5329 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
   5330   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   5331   locations->SetInAt(0, Location::RequiresRegister());
   5332 }
   5333 
   5334 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
   5335   if (CanMoveNullCheckToUser(instruction)) {
   5336     return;
   5337   }
   5338   {
   5339     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   5340     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   5341     Location obj = instruction->GetLocations()->InAt(0);
   5342     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
   5343     RecordPcInfo(instruction, instruction->GetDexPc());
   5344   }
   5345 }
   5346 
   5347 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   5348   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
   5349   AddSlowPath(slow_path);
   5350 
   5351   LocationSummary* locations = instruction->GetLocations();
   5352   Location obj = locations->InAt(0);
   5353 
   5354   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
   5355 }
   5356 
   5357 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
   5358   codegen_->GenerateNullCheck(instruction);
   5359 }
   5360 
   5361 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
   5362   HandleBinaryOp(instruction);
   5363 }
   5364 
   5365 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
   5366   HandleBinaryOp(instruction);
   5367 }
   5368 
   5369 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   5370   LOG(FATAL) << "Unreachable";
   5371 }
   5372 
   5373 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
   5374   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
   5375 }
   5376 
   5377 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
   5378   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5379   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
   5380   if (location.IsStackSlot()) {
   5381     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   5382   } else if (location.IsDoubleStackSlot()) {
   5383     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   5384   }
   5385   locations->SetOut(location);
   5386 }
   5387 
   5388 void InstructionCodeGeneratorARM64::VisitParameterValue(
   5389     HParameterValue* instruction ATTRIBUTE_UNUSED) {
   5390   // Nothing to do, the parameter is already at its location.
   5391 }
   5392 
   5393 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
   5394   LocationSummary* locations =
   5395       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   5396   locations->SetOut(LocationFrom(kArtMethodRegister));
   5397 }
   5398 
   5399 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
   5400     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
   5401   // Nothing to do, the method is already at its location.
   5402 }
   5403 
   5404 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
   5405   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5406   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
   5407     locations->SetInAt(i, Location::Any());
   5408   }
   5409   locations->SetOut(Location::Any());
   5410 }
   5411 
   5412 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   5413   LOG(FATAL) << "Unreachable";
   5414 }
   5415 
   5416 void LocationsBuilderARM64::VisitRem(HRem* rem) {
   5417   Primitive::Type type = rem->GetResultType();
   5418   LocationSummary::CallKind call_kind =
   5419       Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
   5420                                            : LocationSummary::kNoCall;
   5421   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
   5422 
   5423   switch (type) {
   5424     case Primitive::kPrimInt:
   5425     case Primitive::kPrimLong:
   5426       locations->SetInAt(0, Location::RequiresRegister());
   5427       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
   5428       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5429       break;
   5430 
   5431     case Primitive::kPrimFloat:
   5432     case Primitive::kPrimDouble: {
   5433       InvokeRuntimeCallingConvention calling_convention;
   5434       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
   5435       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
   5436       locations->SetOut(calling_convention.GetReturnLocation(type));
   5437 
   5438       break;
   5439     }
   5440 
   5441     default:
   5442       LOG(FATAL) << "Unexpected rem type " << type;
   5443   }
   5444 }
   5445 
   5446 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
   5447   Primitive::Type type = rem->GetResultType();
   5448 
   5449   switch (type) {
   5450     case Primitive::kPrimInt:
   5451     case Primitive::kPrimLong: {
   5452       GenerateDivRemIntegral(rem);
   5453       break;
   5454     }
   5455 
   5456     case Primitive::kPrimFloat:
   5457     case Primitive::kPrimDouble: {
   5458       QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod;
   5459       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
   5460       if (type == Primitive::kPrimFloat) {
   5461         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
   5462       } else {
   5463         CheckEntrypointTypes<kQuickFmod, double, double, double>();
   5464       }
   5465       break;
   5466     }
   5467 
   5468     default:
   5469       LOG(FATAL) << "Unexpected rem type " << type;
   5470       UNREACHABLE();
   5471   }
   5472 }
   5473 
   5474 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
   5475   constructor_fence->SetLocations(nullptr);
   5476 }
   5477 
   5478 void InstructionCodeGeneratorARM64::VisitConstructorFence(
   5479     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
   5480   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   5481 }
   5482 
   5483 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   5484   memory_barrier->SetLocations(nullptr);
   5485 }
   5486 
   5487 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   5488   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
   5489 }
   5490 
   5491 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
   5492   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5493   Primitive::Type return_type = instruction->InputAt(0)->GetType();
   5494   locations->SetInAt(0, ARM64ReturnLocation(return_type));
   5495 }
   5496 
   5497 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
   5498   codegen_->GenerateFrameExit();
   5499 }
   5500 
   5501 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
   5502   instruction->SetLocations(nullptr);
   5503 }
   5504 
   5505 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
   5506   codegen_->GenerateFrameExit();
   5507 }
   5508 
   5509 void LocationsBuilderARM64::VisitRor(HRor* ror) {
   5510   HandleBinaryOp(ror);
   5511 }
   5512 
   5513 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
   5514   HandleBinaryOp(ror);
   5515 }
   5516 
   5517 void LocationsBuilderARM64::VisitShl(HShl* shl) {
   5518   HandleShift(shl);
   5519 }
   5520 
   5521 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
   5522   HandleShift(shl);
   5523 }
   5524 
   5525 void LocationsBuilderARM64::VisitShr(HShr* shr) {
   5526   HandleShift(shr);
   5527 }
   5528 
   5529 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
   5530   HandleShift(shr);
   5531 }
   5532 
   5533 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
   5534   HandleBinaryOp(instruction);
   5535 }
   5536 
   5537 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
   5538   HandleBinaryOp(instruction);
   5539 }
   5540 
   5541 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   5542   HandleFieldGet(instruction, instruction->GetFieldInfo());
   5543 }
   5544 
   5545 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   5546   HandleFieldGet(instruction, instruction->GetFieldInfo());
   5547 }
   5548 
   5549 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   5550   HandleFieldSet(instruction);
   5551 }
   5552 
   5553 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   5554   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   5555 }
   5556 
   5557 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
   5558     HUnresolvedInstanceFieldGet* instruction) {
   5559   FieldAccessCallingConventionARM64 calling_convention;
   5560   codegen_->CreateUnresolvedFieldLocationSummary(
   5561       instruction, instruction->GetFieldType(), calling_convention);
   5562 }
   5563 
   5564 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
   5565     HUnresolvedInstanceFieldGet* instruction) {
   5566   FieldAccessCallingConventionARM64 calling_convention;
   5567   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5568                                           instruction->GetFieldType(),
   5569                                           instruction->GetFieldIndex(),
   5570                                           instruction->GetDexPc(),
   5571                                           calling_convention);
   5572 }
   5573 
   5574 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
   5575     HUnresolvedInstanceFieldSet* instruction) {
   5576   FieldAccessCallingConventionARM64 calling_convention;
   5577   codegen_->CreateUnresolvedFieldLocationSummary(
   5578       instruction, instruction->GetFieldType(), calling_convention);
   5579 }
   5580 
   5581 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
   5582     HUnresolvedInstanceFieldSet* instruction) {
   5583   FieldAccessCallingConventionARM64 calling_convention;
   5584   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5585                                           instruction->GetFieldType(),
   5586                                           instruction->GetFieldIndex(),
   5587                                           instruction->GetDexPc(),
   5588                                           calling_convention);
   5589 }
   5590 
   5591 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
   5592     HUnresolvedStaticFieldGet* instruction) {
   5593   FieldAccessCallingConventionARM64 calling_convention;
   5594   codegen_->CreateUnresolvedFieldLocationSummary(
   5595       instruction, instruction->GetFieldType(), calling_convention);
   5596 }
   5597 
   5598 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
   5599     HUnresolvedStaticFieldGet* instruction) {
   5600   FieldAccessCallingConventionARM64 calling_convention;
   5601   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5602                                           instruction->GetFieldType(),
   5603                                           instruction->GetFieldIndex(),
   5604                                           instruction->GetDexPc(),
   5605                                           calling_convention);
   5606 }
   5607 
   5608 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
   5609     HUnresolvedStaticFieldSet* instruction) {
   5610   FieldAccessCallingConventionARM64 calling_convention;
   5611   codegen_->CreateUnresolvedFieldLocationSummary(
   5612       instruction, instruction->GetFieldType(), calling_convention);
   5613 }
   5614 
   5615 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
   5616     HUnresolvedStaticFieldSet* instruction) {
   5617   FieldAccessCallingConventionARM64 calling_convention;
   5618   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5619                                           instruction->GetFieldType(),
   5620                                           instruction->GetFieldIndex(),
   5621                                           instruction->GetDexPc(),
   5622                                           calling_convention);
   5623 }
   5624 
   5625 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5626   LocationSummary* locations =
   5627       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
   5628   // In suspend check slow path, usually there are no caller-save registers at all.
   5629   // If SIMD instructions are present, however, we force spilling all live SIMD
   5630   // registers in full width (since the runtime only saves/restores lower part).
   5631   locations->SetCustomSlowPathCallerSaves(
   5632       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
   5633 }
   5634 
   5635 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5636   HBasicBlock* block = instruction->GetBlock();
   5637   if (block->GetLoopInformation() != nullptr) {
   5638     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
   5639     // The back edge will generate the suspend check.
   5640     return;
   5641   }
   5642   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
   5643     // The goto will generate the suspend check.
   5644     return;
   5645   }
   5646   GenerateSuspendCheck(instruction, nullptr);
   5647 }
   5648 
   5649 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
   5650   LocationSummary* locations =
   5651       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5652   InvokeRuntimeCallingConvention calling_convention;
   5653   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5654 }
   5655 
   5656 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
   5657   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
   5658   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
   5659 }
   5660 
   5661 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
   5662   LocationSummary* locations =
   5663       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   5664   Primitive::Type input_type = conversion->GetInputType();
   5665   Primitive::Type result_type = conversion->GetResultType();
   5666   DCHECK_NE(input_type, result_type);
   5667   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
   5668       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
   5669     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
   5670   }
   5671 
   5672   if (Primitive::IsFloatingPointType(input_type)) {
   5673     locations->SetInAt(0, Location::RequiresFpuRegister());
   5674   } else {
   5675     locations->SetInAt(0, Location::RequiresRegister());
   5676   }
   5677 
   5678   if (Primitive::IsFloatingPointType(result_type)) {
   5679     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   5680   } else {
   5681     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5682   }
   5683 }
   5684 
   5685 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
   5686   Primitive::Type result_type = conversion->GetResultType();
   5687   Primitive::Type input_type = conversion->GetInputType();
   5688 
   5689   DCHECK_NE(input_type, result_type);
   5690 
   5691   if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
   5692     int result_size = Primitive::ComponentSize(result_type);
   5693     int input_size = Primitive::ComponentSize(input_type);
   5694     int min_size = std::min(result_size, input_size);
   5695     Register output = OutputRegister(conversion);
   5696     Register source = InputRegisterAt(conversion, 0);
   5697     if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
   5698       // 'int' values are used directly as W registers, discarding the top
   5699       // bits, so we don't need to sign-extend and can just perform a move.
   5700       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
   5701       // top 32 bits of the target register. We theoretically could leave those
   5702       // bits unchanged, but we would have to make sure that no code uses a
   5703       // 32bit input value as a 64bit value assuming that the top 32 bits are
   5704       // zero.
   5705       __ Mov(output.W(), source.W());
   5706     } else if (result_type == Primitive::kPrimChar ||
   5707                (input_type == Primitive::kPrimChar && input_size < result_size)) {
   5708       __ Ubfx(output,
   5709               output.IsX() ? source.X() : source.W(),
   5710               0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
   5711     } else {
   5712       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
   5713     }
   5714   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
   5715     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
   5716   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
   5717     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
   5718     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
   5719   } else if (Primitive::IsFloatingPointType(result_type) &&
   5720              Primitive::IsFloatingPointType(input_type)) {
   5721     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
   5722   } else {
   5723     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
   5724                 << " to " << result_type;
   5725   }
   5726 }
   5727 
   5728 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
   5729   HandleShift(ushr);
   5730 }
   5731 
   5732 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
   5733   HandleShift(ushr);
   5734 }
   5735 
   5736 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
   5737   HandleBinaryOp(instruction);
   5738 }
   5739 
   5740 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
   5741   HandleBinaryOp(instruction);
   5742 }
   5743 
   5744 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   5745   // Nothing to do, this should be removed during prepare for register allocator.
   5746   LOG(FATAL) << "Unreachable";
   5747 }
   5748 
   5749 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   5750   // Nothing to do, this should be removed during prepare for register allocator.
   5751   LOG(FATAL) << "Unreachable";
   5752 }
   5753 
   5754 // Simple implementation of packed switch - generate cascaded compare/jumps.
   5755 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   5756   LocationSummary* locations =
   5757       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   5758   locations->SetInAt(0, Location::RequiresRegister());
   5759 }
   5760 
   5761 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   5762   int32_t lower_bound = switch_instr->GetStartValue();
   5763   uint32_t num_entries = switch_instr->GetNumEntries();
   5764   Register value_reg = InputRegisterAt(switch_instr, 0);
   5765   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   5766 
   5767   // Roughly set 16 as max average assemblies generated per HIR in a graph.
   5768   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
   5769   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
   5770   // make sure we don't emit it if the target may run out of range.
   5771   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
   5772   // ranges and emit the tables only as required.
   5773   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
   5774 
   5775   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
   5776       // Current instruction id is an upper bound of the number of HIRs in the graph.
   5777       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
   5778     // Create a series of compare/jumps.
   5779     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   5780     Register temp = temps.AcquireW();
   5781     __ Subs(temp, value_reg, Operand(lower_bound));
   5782 
   5783     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   5784     // Jump to successors[0] if value == lower_bound.
   5785     __ B(eq, codegen_->GetLabelOf(successors[0]));
   5786     int32_t last_index = 0;
   5787     for (; num_entries - last_index > 2; last_index += 2) {
   5788       __ Subs(temp, temp, Operand(2));
   5789       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
   5790       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
   5791       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
   5792       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
   5793     }
   5794     if (num_entries - last_index == 2) {
   5795       // The last missing case_value.
   5796       __ Cmp(temp, Operand(1));
   5797       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
   5798     }
   5799 
   5800     // And the default for any other value.
   5801     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
   5802       __ B(codegen_->GetLabelOf(default_block));
   5803     }
   5804   } else {
   5805     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
   5806 
   5807     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   5808 
   5809     // Below instructions should use at most one blocked register. Since there are two blocked
   5810     // registers, we are free to block one.
   5811     Register temp_w = temps.AcquireW();
   5812     Register index;
   5813     // Remove the bias.
   5814     if (lower_bound != 0) {
   5815       index = temp_w;
   5816       __ Sub(index, value_reg, Operand(lower_bound));
   5817     } else {
   5818       index = value_reg;
   5819     }
   5820 
   5821     // Jump to default block if index is out of the range.
   5822     __ Cmp(index, Operand(num_entries));
   5823     __ B(hs, codegen_->GetLabelOf(default_block));
   5824 
   5825     // In current VIXL implementation, it won't require any blocked registers to encode the
   5826     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
   5827     // register pressure.
   5828     Register table_base = temps.AcquireX();
   5829     // Load jump offset from the table.
   5830     __ Adr(table_base, jump_table->GetTableStartLabel());
   5831     Register jump_offset = temp_w;
   5832     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
   5833 
   5834     // Jump to target block by branching to table_base(pc related) + offset.
   5835     Register target_address = table_base;
   5836     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
   5837     __ Br(target_address);
   5838   }
   5839 }
   5840 
   5841 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
   5842     HInstruction* instruction,
   5843     Location out,
   5844     uint32_t offset,
   5845     Location maybe_temp,
   5846     ReadBarrierOption read_barrier_option) {
   5847   Primitive::Type type = Primitive::kPrimNot;
   5848   Register out_reg = RegisterFrom(out, type);
   5849   if (read_barrier_option == kWithReadBarrier) {
   5850     CHECK(kEmitCompilerReadBarrier);
   5851     if (kUseBakerReadBarrier) {
   5852       // Load with fast path based Baker's read barrier.
   5853       // /* HeapReference<Object> */ out = *(out + offset)
   5854       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
   5855                                                       out,
   5856                                                       out_reg,
   5857                                                       offset,
   5858                                                       maybe_temp,
   5859                                                       /* needs_null_check */ false,
   5860                                                       /* use_load_acquire */ false);
   5861     } else {
   5862       // Load with slow path based read barrier.
   5863       // Save the value of `out` into `maybe_temp` before overwriting it
   5864       // in the following move operation, as we will need it for the
   5865       // read barrier below.
   5866       Register temp_reg = RegisterFrom(maybe_temp, type);
   5867       __ Mov(temp_reg, out_reg);
   5868       // /* HeapReference<Object> */ out = *(out + offset)
   5869       __ Ldr(out_reg, HeapOperand(out_reg, offset));
   5870       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
   5871     }
   5872   } else {
   5873     // Plain load with no read barrier.
   5874     // /* HeapReference<Object> */ out = *(out + offset)
   5875     __ Ldr(out_reg, HeapOperand(out_reg, offset));
   5876     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
   5877   }
   5878 }
   5879 
   5880 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
   5881     HInstruction* instruction,
   5882     Location out,
   5883     Location obj,
   5884     uint32_t offset,
   5885     Location maybe_temp,
   5886     ReadBarrierOption read_barrier_option) {
   5887   Primitive::Type type = Primitive::kPrimNot;
   5888   Register out_reg = RegisterFrom(out, type);
   5889   Register obj_reg = RegisterFrom(obj, type);
   5890   if (read_barrier_option == kWithReadBarrier) {
   5891     CHECK(kEmitCompilerReadBarrier);
   5892     if (kUseBakerReadBarrier) {
   5893       // Load with fast path based Baker's read barrier.
   5894       // /* HeapReference<Object> */ out = *(obj + offset)
   5895       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
   5896                                                       out,
   5897                                                       obj_reg,
   5898                                                       offset,
   5899                                                       maybe_temp,
   5900                                                       /* needs_null_check */ false,
   5901                                                       /* use_load_acquire */ false);
   5902     } else {
   5903       // Load with slow path based read barrier.
   5904       // /* HeapReference<Object> */ out = *(obj + offset)
   5905       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
   5906       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
   5907     }
   5908   } else {
   5909     // Plain load with no read barrier.
   5910     // /* HeapReference<Object> */ out = *(obj + offset)
   5911     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
   5912     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
   5913   }
   5914 }
   5915 
   5916 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
   5917     HInstruction* instruction,
   5918     Location root,
   5919     Register obj,
   5920     uint32_t offset,
   5921     vixl::aarch64::Label* fixup_label,
   5922     ReadBarrierOption read_barrier_option) {
   5923   DCHECK(fixup_label == nullptr || offset == 0u);
   5924   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
   5925   if (read_barrier_option == kWithReadBarrier) {
   5926     DCHECK(kEmitCompilerReadBarrier);
   5927     if (kUseBakerReadBarrier) {
   5928       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
   5929       // Baker's read barrier are used.
   5930       if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
   5931           !Runtime::Current()->UseJitCompilation()) {
   5932         // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
   5933         // the Marking Register) to decide whether we need to enter
   5934         // the slow path to mark the GC root.
   5935         //
   5936         // We use link-time generated thunks for the slow path. That thunk
   5937         // checks the reference and jumps to the entrypoint if needed.
   5938         //
   5939         //     lr = &return_address;
   5940         //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
   5941         //     if (mr) {  // Thread::Current()->GetIsGcMarking()
   5942         //       goto gc_root_thunk<root_reg>(lr)
   5943         //     }
   5944         //   return_address:
   5945 
   5946         UseScratchRegisterScope temps(GetVIXLAssembler());
   5947         DCHECK(temps.IsAvailable(ip0));
   5948         DCHECK(temps.IsAvailable(ip1));
   5949         temps.Exclude(ip0, ip1);
   5950         uint32_t custom_data =
   5951             linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
   5952         vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
   5953 
   5954         EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
   5955         vixl::aarch64::Label return_address;
   5956         __ adr(lr, &return_address);
   5957         if (fixup_label != nullptr) {
   5958           __ Bind(fixup_label);
   5959         }
   5960         static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
   5961                       "GC root LDR must be 2 instruction (8B) before the return address label.");
   5962         __ ldr(root_reg, MemOperand(obj.X(), offset));
   5963         __ Bind(cbnz_label);
   5964         __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
   5965         __ Bind(&return_address);
   5966       } else {
   5967         // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
   5968         // the Marking Register) to decide whether we need to enter
   5969         // the slow path to mark the GC root.
   5970         //
   5971         //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
   5972         //   if (mr) {  // Thread::Current()->GetIsGcMarking()
   5973         //     // Slow path.
   5974         //     entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   5975         //     root = entrypoint(root);  // root = ReadBarrier::Mark(root);  // Entry point call.
   5976         //   }
   5977 
   5978         // Slow path marking the GC root `root`. The entrypoint will
   5979         // be loaded by the slow path code.
   5980         SlowPathCodeARM64* slow_path =
   5981             new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
   5982         codegen_->AddSlowPath(slow_path);
   5983 
   5984         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
   5985         if (fixup_label == nullptr) {
   5986           __ Ldr(root_reg, MemOperand(obj, offset));
   5987         } else {
   5988           codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
   5989         }
   5990         static_assert(
   5991             sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
   5992             "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
   5993             "have different sizes.");
   5994         static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
   5995                       "art::mirror::CompressedReference<mirror::Object> and int32_t "
   5996                       "have different sizes.");
   5997 
   5998         __ Cbnz(mr, slow_path->GetEntryLabel());
   5999         __ Bind(slow_path->GetExitLabel());
   6000       }
   6001     } else {
   6002       // GC root loaded through a slow path for read barriers other
   6003       // than Baker's.
   6004       // /* GcRoot<mirror::Object>* */ root = obj + offset
   6005       if (fixup_label == nullptr) {
   6006         __ Add(root_reg.X(), obj.X(), offset);
   6007       } else {
   6008         codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
   6009       }
   6010       // /* mirror::Object* */ root = root->Read()
   6011       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
   6012     }
   6013   } else {
   6014     // Plain GC root load with no read barrier.
   6015     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
   6016     if (fixup_label == nullptr) {
   6017       __ Ldr(root_reg, MemOperand(obj, offset));
   6018     } else {
   6019       codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
   6020     }
   6021     // Note that GC roots are not affected by heap poisoning, thus we
   6022     // do not have to unpoison `root_reg` here.
   6023   }
   6024 }
   6025 
   6026 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
   6027                                                                Location ref,
   6028                                                                Register obj,
   6029                                                                uint32_t offset,
   6030                                                                Location maybe_temp,
   6031                                                                bool needs_null_check,
   6032                                                                bool use_load_acquire) {
   6033   DCHECK(kEmitCompilerReadBarrier);
   6034   DCHECK(kUseBakerReadBarrier);
   6035 
   6036   if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
   6037       !use_load_acquire &&
   6038       !Runtime::Current()->UseJitCompilation()) {
   6039     // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
   6040     // Marking Register) to decide whether we need to enter the slow
   6041     // path to mark the reference. Then, in the slow path, check the
   6042     // gray bit in the lock word of the reference's holder (`obj`) to
   6043     // decide whether to mark `ref` or not.
   6044     //
   6045     // We use link-time generated thunks for the slow path. That thunk checks
   6046     // the holder and jumps to the entrypoint if needed. If the holder is not
   6047     // gray, it creates a fake dependency and returns to the LDR instruction.
   6048     //
   6049     //     lr = &gray_return_address;
   6050     //     if (mr) {  // Thread::Current()->GetIsGcMarking()
   6051     //       goto field_thunk<holder_reg, base_reg>(lr)
   6052     //     }
   6053     //   not_gray_return_address:
   6054     //     // Original reference load. If the offset is too large to fit
   6055     //     // into LDR, we use an adjusted base register here.
   6056     //     HeapReference<mirror::Object> reference = *(obj+offset);
   6057     //   gray_return_address:
   6058 
   6059     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
   6060     Register base = obj;
   6061     if (offset >= kReferenceLoadMinFarOffset) {
   6062       DCHECK(maybe_temp.IsRegister());
   6063       base = WRegisterFrom(maybe_temp);
   6064       static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
   6065       __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
   6066       offset &= (kReferenceLoadMinFarOffset - 1u);
   6067     }
   6068     UseScratchRegisterScope temps(GetVIXLAssembler());
   6069     DCHECK(temps.IsAvailable(ip0));
   6070     DCHECK(temps.IsAvailable(ip1));
   6071     temps.Exclude(ip0, ip1);
   6072     uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
   6073         base.GetCode(),
   6074         obj.GetCode());
   6075     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
   6076 
   6077     EmissionCheckScope guard(GetVIXLAssembler(),
   6078                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
   6079     vixl::aarch64::Label return_address;
   6080     __ adr(lr, &return_address);
   6081     __ Bind(cbnz_label);
   6082     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
   6083     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
   6084                   "Field LDR must be 1 instruction (4B) before the return address label; "
   6085                   " 2 instructions (8B) for heap poisoning.");
   6086     Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
   6087     __ ldr(ref_reg, MemOperand(base.X(), offset));
   6088     if (needs_null_check) {
   6089       MaybeRecordImplicitNullCheck(instruction);
   6090     }
   6091     GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
   6092     __ Bind(&return_address);
   6093     return;
   6094   }
   6095 
   6096   // /* HeapReference<Object> */ ref = *(obj + offset)
   6097   Register temp = WRegisterFrom(maybe_temp);
   6098   Location no_index = Location::NoLocation();
   6099   size_t no_scale_factor = 0u;
   6100   GenerateReferenceLoadWithBakerReadBarrier(instruction,
   6101                                             ref,
   6102                                             obj,
   6103                                             offset,
   6104                                             no_index,
   6105                                             no_scale_factor,
   6106                                             temp,
   6107                                             needs_null_check,
   6108                                             use_load_acquire);
   6109 }
   6110 
   6111 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
   6112                                                                Location ref,
   6113                                                                Register obj,
   6114                                                                uint32_t data_offset,
   6115                                                                Location index,
   6116                                                                Register temp,
   6117                                                                bool needs_null_check) {
   6118   DCHECK(kEmitCompilerReadBarrier);
   6119   DCHECK(kUseBakerReadBarrier);
   6120 
   6121   static_assert(
   6122       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   6123       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   6124   size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
   6125 
   6126   if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
   6127       !Runtime::Current()->UseJitCompilation()) {
   6128     // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
   6129     // Marking Register) to decide whether we need to enter the slow
   6130     // path to mark the reference. Then, in the slow path, check the
   6131     // gray bit in the lock word of the reference's holder (`obj`) to
   6132     // decide whether to mark `ref` or not.
   6133     //
   6134     // We use link-time generated thunks for the slow path. That thunk checks
   6135     // the holder and jumps to the entrypoint if needed. If the holder is not
   6136     // gray, it creates a fake dependency and returns to the LDR instruction.
   6137     //
   6138     //     lr = &gray_return_address;
   6139     //     if (mr) {  // Thread::Current()->GetIsGcMarking()
   6140     //       goto array_thunk<base_reg>(lr)
   6141     //     }
   6142     //   not_gray_return_address:
   6143     //     // Original reference load. If the offset is too large to fit
   6144     //     // into LDR, we use an adjusted base register here.
   6145     //     HeapReference<mirror::Object> reference = data[index];
   6146     //   gray_return_address:
   6147 
   6148     DCHECK(index.IsValid());
   6149     Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
   6150     Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
   6151 
   6152     UseScratchRegisterScope temps(GetVIXLAssembler());
   6153     DCHECK(temps.IsAvailable(ip0));
   6154     DCHECK(temps.IsAvailable(ip1));
   6155     temps.Exclude(ip0, ip1);
   6156     uint32_t custom_data =
   6157         linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
   6158     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
   6159 
   6160     __ Add(temp.X(), obj.X(), Operand(data_offset));
   6161     EmissionCheckScope guard(GetVIXLAssembler(),
   6162                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
   6163     vixl::aarch64::Label return_address;
   6164     __ adr(lr, &return_address);
   6165     __ Bind(cbnz_label);
   6166     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
   6167     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
   6168                   "Array LDR must be 1 instruction (4B) before the return address label; "
   6169                   " 2 instructions (8B) for heap poisoning.");
   6170     __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
   6171     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
   6172     GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
   6173     __ Bind(&return_address);
   6174     return;
   6175   }
   6176 
   6177   // Array cells are never volatile variables, therefore array loads
   6178   // never use Load-Acquire instructions on ARM64.
   6179   const bool use_load_acquire = false;
   6180 
   6181   // /* HeapReference<Object> */ ref =
   6182   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   6183   GenerateReferenceLoadWithBakerReadBarrier(instruction,
   6184                                             ref,
   6185                                             obj,
   6186                                             data_offset,
   6187                                             index,
   6188                                             scale_factor,
   6189                                             temp,
   6190                                             needs_null_check,
   6191                                             use_load_acquire);
   6192 }
   6193 
   6194 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
   6195                                                                    Location ref,
   6196                                                                    Register obj,
   6197                                                                    uint32_t offset,
   6198                                                                    Location index,
   6199                                                                    size_t scale_factor,
   6200                                                                    Register temp,
   6201                                                                    bool needs_null_check,
   6202                                                                    bool use_load_acquire) {
   6203   DCHECK(kEmitCompilerReadBarrier);
   6204   DCHECK(kUseBakerReadBarrier);
   6205   // If we are emitting an array load, we should not be using a
   6206   // Load Acquire instruction.  In other words:
   6207   // `instruction->IsArrayGet()` => `!use_load_acquire`.
   6208   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
   6209 
   6210   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
   6211   // Marking Register) to decide whether we need to enter the slow
   6212   // path to mark the reference. Then, in the slow path, check the
   6213   // gray bit in the lock word of the reference's holder (`obj`) to
   6214   // decide whether to mark `ref` or not.
   6215   //
   6216   //   if (mr) {  // Thread::Current()->GetIsGcMarking()
   6217   //     // Slow path.
   6218   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   6219   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   6220   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   6221   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   6222   //     if (is_gray) {
   6223   //       entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   6224   //       ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
   6225   //     }
   6226   //   } else {
   6227   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   6228   //   }
   6229 
   6230   // Slow path marking the object `ref` when the GC is marking. The
   6231   // entrypoint will be loaded by the slow path code.
   6232   SlowPathCodeARM64* slow_path =
   6233       new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
   6234           instruction,
   6235           ref,
   6236           obj,
   6237           offset,
   6238           index,
   6239           scale_factor,
   6240           needs_null_check,
   6241           use_load_acquire,
   6242           temp);
   6243   AddSlowPath(slow_path);
   6244 
   6245   __ Cbnz(mr, slow_path->GetEntryLabel());
   6246   // Fast path: the GC is not marking: just load the reference.
   6247   GenerateRawReferenceLoad(
   6248       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
   6249   __ Bind(slow_path->GetExitLabel());
   6250 }
   6251 
   6252 void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
   6253                                                                   Location ref,
   6254                                                                   Register obj,
   6255                                                                   Location field_offset,
   6256                                                                   Register temp,
   6257                                                                   bool needs_null_check,
   6258                                                                   bool use_load_acquire) {
   6259   DCHECK(kEmitCompilerReadBarrier);
   6260   DCHECK(kUseBakerReadBarrier);
   6261   // If we are emitting an array load, we should not be using a
   6262   // Load Acquire instruction.  In other words:
   6263   // `instruction->IsArrayGet()` => `!use_load_acquire`.
   6264   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
   6265 
   6266   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
   6267   // Marking Register) to decide whether we need to enter the slow
   6268   // path to update the reference field within `obj`. Then, in the
   6269   // slow path, check the gray bit in the lock word of the reference's
   6270   // holder (`obj`) to decide whether to mark `ref` and update the
   6271   // field or not.
   6272   //
   6273   //   if (mr) {  // Thread::Current()->GetIsGcMarking()
   6274   //     // Slow path.
   6275   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   6276   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   6277   //     HeapReference<mirror::Object> ref = *(obj + field_offset);  // Reference load.
   6278   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   6279   //     if (is_gray) {
   6280   //       old_ref = ref;
   6281   //       entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   6282   //       ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
   6283   //       compareAndSwapObject(obj, field_offset, old_ref, ref);
   6284   //     }
   6285   //   }
   6286 
   6287   // Slow path updating the object reference at address `obj + field_offset`
   6288   // when the GC is marking. The entrypoint will be loaded by the slow path code.
   6289   SlowPathCodeARM64* slow_path =
   6290       new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
   6291           instruction,
   6292           ref,
   6293           obj,
   6294           /* offset */ 0u,
   6295           /* index */ field_offset,
   6296           /* scale_factor */ 0u /* "times 1" */,
   6297           needs_null_check,
   6298           use_load_acquire,
   6299           temp);
   6300   AddSlowPath(slow_path);
   6301 
   6302   __ Cbnz(mr, slow_path->GetEntryLabel());
   6303   // Fast path: the GC is not marking: nothing to do (the field is
   6304   // up-to-date, and we don't need to load the reference).
   6305   __ Bind(slow_path->GetExitLabel());
   6306 }
   6307 
   6308 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
   6309                                                   Location ref,
   6310                                                   Register obj,
   6311                                                   uint32_t offset,
   6312                                                   Location index,
   6313                                                   size_t scale_factor,
   6314                                                   bool needs_null_check,
   6315                                                   bool use_load_acquire) {
   6316   DCHECK(obj.IsW());
   6317   Primitive::Type type = Primitive::kPrimNot;
   6318   Register ref_reg = RegisterFrom(ref, type);
   6319 
   6320   // If needed, vixl::EmissionCheckScope guards are used to ensure
   6321   // that no pools are emitted between the load (macro) instruction
   6322   // and MaybeRecordImplicitNullCheck.
   6323 
   6324   if (index.IsValid()) {
   6325     // Load types involving an "index": ArrayGet,
   6326     // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
   6327     // intrinsics.
   6328     if (use_load_acquire) {
   6329       // UnsafeGetObjectVolatile intrinsic case.
   6330       // Register `index` is not an index in an object array, but an
   6331       // offset to an object reference field within object `obj`.
   6332       DCHECK(instruction->IsInvoke()) << instruction->DebugName();
   6333       DCHECK(instruction->GetLocations()->Intrinsified());
   6334       DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
   6335           << instruction->AsInvoke()->GetIntrinsic();
   6336       DCHECK_EQ(offset, 0u);
   6337       DCHECK_EQ(scale_factor, 0u);
   6338       DCHECK_EQ(needs_null_check, false);
   6339       // /* HeapReference<mirror::Object> */ ref = *(obj + index)
   6340       MemOperand field = HeapOperand(obj, XRegisterFrom(index));
   6341       LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
   6342     } else {
   6343       // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
   6344       // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
   6345       if (index.IsConstant()) {
   6346         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
   6347         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   6348         Load(type, ref_reg, HeapOperand(obj, computed_offset));
   6349         if (needs_null_check) {
   6350           MaybeRecordImplicitNullCheck(instruction);
   6351         }
   6352       } else {
   6353         UseScratchRegisterScope temps(GetVIXLAssembler());
   6354         Register temp = temps.AcquireW();
   6355         __ Add(temp, obj, offset);
   6356         {
   6357           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   6358           Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
   6359           if (needs_null_check) {
   6360             MaybeRecordImplicitNullCheck(instruction);
   6361           }
   6362         }
   6363       }
   6364     }
   6365   } else {
   6366     // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
   6367     MemOperand field = HeapOperand(obj, offset);
   6368     if (use_load_acquire) {
   6369       // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
   6370       LoadAcquire(instruction, ref_reg, field, needs_null_check);
   6371     } else {
   6372       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   6373       Load(type, ref_reg, field);
   6374       if (needs_null_check) {
   6375         MaybeRecordImplicitNullCheck(instruction);
   6376       }
   6377     }
   6378   }
   6379 
   6380   // Object* ref = ref_addr->AsMirrorPtr()
   6381   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
   6382 }
   6383 
   6384 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
   6385                                                  Location out,
   6386                                                  Location ref,
   6387                                                  Location obj,
   6388                                                  uint32_t offset,
   6389                                                  Location index) {
   6390   DCHECK(kEmitCompilerReadBarrier);
   6391 
   6392   // Insert a slow path based read barrier *after* the reference load.
   6393   //
   6394   // If heap poisoning is enabled, the unpoisoning of the loaded
   6395   // reference will be carried out by the runtime within the slow
   6396   // path.
   6397   //
   6398   // Note that `ref` currently does not get unpoisoned (when heap
   6399   // poisoning is enabled), which is alright as the `ref` argument is
   6400   // not used by the artReadBarrierSlow entry point.
   6401   //
   6402   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
   6403   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
   6404       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
   6405   AddSlowPath(slow_path);
   6406 
   6407   __ B(slow_path->GetEntryLabel());
   6408   __ Bind(slow_path->GetExitLabel());
   6409 }
   6410 
   6411 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
   6412                                                       Location out,
   6413                                                       Location ref,
   6414                                                       Location obj,
   6415                                                       uint32_t offset,
   6416                                                       Location index) {
   6417   if (kEmitCompilerReadBarrier) {
   6418     // Baker's read barriers shall be handled by the fast path
   6419     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
   6420     DCHECK(!kUseBakerReadBarrier);
   6421     // If heap poisoning is enabled, unpoisoning will be taken care of
   6422     // by the runtime within the slow path.
   6423     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   6424   } else if (kPoisonHeapReferences) {
   6425     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
   6426   }
   6427 }
   6428 
   6429 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
   6430                                                         Location out,
   6431                                                         Location root) {
   6432   DCHECK(kEmitCompilerReadBarrier);
   6433 
   6434   // Insert a slow path based read barrier *after* the GC root load.
   6435   //
   6436   // Note that GC roots are not affected by heap poisoning, so we do
   6437   // not need to do anything special for this here.
   6438   SlowPathCodeARM64* slow_path =
   6439       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
   6440   AddSlowPath(slow_path);
   6441 
   6442   __ B(slow_path->GetEntryLabel());
   6443   __ Bind(slow_path->GetExitLabel());
   6444 }
   6445 
   6446 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
   6447   LocationSummary* locations =
   6448       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   6449   locations->SetInAt(0, Location::RequiresRegister());
   6450   locations->SetOut(Location::RequiresRegister());
   6451 }
   6452 
   6453 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
   6454   LocationSummary* locations = instruction->GetLocations();
   6455   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
   6456     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   6457         instruction->GetIndex(), kArm64PointerSize).SizeValue();
   6458     __ Ldr(XRegisterFrom(locations->Out()),
   6459            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
   6460   } else {
   6461     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   6462         instruction->GetIndex(), kArm64PointerSize));
   6463     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
   6464         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
   6465     __ Ldr(XRegisterFrom(locations->Out()),
   6466            MemOperand(XRegisterFrom(locations->Out()), method_offset));
   6467   }
   6468 }
   6469 
   6470 static void PatchJitRootUse(uint8_t* code,
   6471                             const uint8_t* roots_data,
   6472                             vixl::aarch64::Literal<uint32_t>* literal,
   6473                             uint64_t index_in_table) {
   6474   uint32_t literal_offset = literal->GetOffset();
   6475   uintptr_t address =
   6476       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
   6477   uint8_t* data = code + literal_offset;
   6478   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
   6479 }
   6480 
   6481 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   6482   for (const auto& entry : jit_string_patches_) {
   6483     const StringReference& string_reference = entry.first;
   6484     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
   6485     const auto it = jit_string_roots_.find(string_reference);
   6486     DCHECK(it != jit_string_roots_.end());
   6487     uint64_t index_in_table = it->second;
   6488     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   6489   }
   6490   for (const auto& entry : jit_class_patches_) {
   6491     const TypeReference& type_reference = entry.first;
   6492     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
   6493     const auto it = jit_class_roots_.find(type_reference);
   6494     DCHECK(it != jit_class_roots_.end());
   6495     uint64_t index_in_table = it->second;
   6496     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   6497   }
   6498 }
   6499 
   6500 #undef __
   6501 #undef QUICK_ENTRY_POINT
   6502 
   6503 }  // namespace arm64
   6504 }  // namespace art
   6505