Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_arm64.h"
     18 
     19 #include "arch/arm64/asm_support_arm64.h"
     20 #include "arch/arm64/instruction_set_features_arm64.h"
     21 #include "art_method.h"
     22 #include "code_generator_utils.h"
     23 #include "compiled_method.h"
     24 #include "entrypoints/quick/quick_entrypoints.h"
     25 #include "entrypoints/quick/quick_entrypoints_enum.h"
     26 #include "gc/accounting/card_table.h"
     27 #include "intrinsics.h"
     28 #include "intrinsics_arm64.h"
     29 #include "linker/arm64/relative_patcher_arm64.h"
     30 #include "mirror/array-inl.h"
     31 #include "mirror/class-inl.h"
     32 #include "offsets.h"
     33 #include "thread.h"
     34 #include "utils/arm64/assembler_arm64.h"
     35 #include "utils/assembler.h"
     36 #include "utils/stack_checks.h"
     37 
     38 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
     39 using vixl::ExactAssemblyScope;
     40 using vixl::CodeBufferCheckScope;
     41 using vixl::EmissionCheckScope;
     42 
     43 #ifdef __
     44 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
     45 #endif
     46 
     47 namespace art {
     48 
     49 template<class MirrorType>
     50 class GcRoot;
     51 
     52 namespace arm64 {
     53 
     54 using helpers::ARM64EncodableConstantOrRegister;
     55 using helpers::ArtVixlRegCodeCoherentForRegSet;
     56 using helpers::CPURegisterFrom;
     57 using helpers::DRegisterFrom;
     58 using helpers::FPRegisterFrom;
     59 using helpers::HeapOperand;
     60 using helpers::HeapOperandFrom;
     61 using helpers::InputCPURegisterAt;
     62 using helpers::InputCPURegisterOrZeroRegAt;
     63 using helpers::InputFPRegisterAt;
     64 using helpers::InputOperandAt;
     65 using helpers::InputRegisterAt;
     66 using helpers::Int64ConstantFrom;
     67 using helpers::IsConstantZeroBitPattern;
     68 using helpers::LocationFrom;
     69 using helpers::OperandFromMemOperand;
     70 using helpers::OutputCPURegister;
     71 using helpers::OutputFPRegister;
     72 using helpers::OutputRegister;
     73 using helpers::QRegisterFrom;
     74 using helpers::RegisterFrom;
     75 using helpers::StackOperandFrom;
     76 using helpers::VIXLRegCodeFromART;
     77 using helpers::WRegisterFrom;
     78 using helpers::XRegisterFrom;
     79 
     80 static constexpr int kCurrentMethodStackOffset = 0;
     81 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
     82 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
     83 // generates less code/data with a small num_entries.
     84 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
     85 
     86 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
     87 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
     88 // For the Baker read barrier implementation using link-generated thunks we need to split
     89 // the offset explicitly.
     90 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
     91 
     92 // Flags controlling the use of link-time generated thunks for Baker read barriers.
     93 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
     94 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
     95 
     96 // Some instructions have special requirements for a temporary, for example
     97 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
     98 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
     99 // loads with large offsets need a fixed register to limit the number of link-time
    100 // thunks we generate. For these and similar cases, we want to reserve a specific
    101 // register that's neither callee-save nor an argument register. We choose x15.
    102 inline Location FixedTempLocation() {
    103   return Location::RegisterLocation(x15.GetCode());
    104 }
    105 
    106 inline Condition ARM64Condition(IfCondition cond) {
    107   switch (cond) {
    108     case kCondEQ: return eq;
    109     case kCondNE: return ne;
    110     case kCondLT: return lt;
    111     case kCondLE: return le;
    112     case kCondGT: return gt;
    113     case kCondGE: return ge;
    114     case kCondB:  return lo;
    115     case kCondBE: return ls;
    116     case kCondA:  return hi;
    117     case kCondAE: return hs;
    118   }
    119   LOG(FATAL) << "Unreachable";
    120   UNREACHABLE();
    121 }
    122 
    123 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
    124   // The ARM64 condition codes can express all the necessary branches, see the
    125   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
    126   // There is no dex instruction or HIR that would need the missing conditions
    127   // "equal or unordered" or "not equal".
    128   switch (cond) {
    129     case kCondEQ: return eq;
    130     case kCondNE: return ne /* unordered */;
    131     case kCondLT: return gt_bias ? cc : lt /* unordered */;
    132     case kCondLE: return gt_bias ? ls : le /* unordered */;
    133     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
    134     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
    135     default:
    136       LOG(FATAL) << "UNREACHABLE";
    137       UNREACHABLE();
    138   }
    139 }
    140 
    141 Location ARM64ReturnLocation(Primitive::Type return_type) {
    142   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
    143   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
    144   // but we use the exact registers for clarity.
    145   if (return_type == Primitive::kPrimFloat) {
    146     return LocationFrom(s0);
    147   } else if (return_type == Primitive::kPrimDouble) {
    148     return LocationFrom(d0);
    149   } else if (return_type == Primitive::kPrimLong) {
    150     return LocationFrom(x0);
    151   } else if (return_type == Primitive::kPrimVoid) {
    152     return Location::NoLocation();
    153   } else {
    154     return LocationFrom(w0);
    155   }
    156 }
    157 
    158 Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) {
    159   return ARM64ReturnLocation(return_type);
    160 }
    161 
    162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
    163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
    164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
    165 
    166 // Calculate memory accessing operand for save/restore live registers.
    167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
    168                                            LocationSummary* locations,
    169                                            int64_t spill_offset,
    170                                            bool is_save) {
    171   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
    172   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
    173   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
    174                                          codegen->GetNumberOfCoreRegisters(),
    175                                          fp_spills,
    176                                          codegen->GetNumberOfFloatingPointRegisters()));
    177 
    178   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
    179   unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
    180   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
    181 
    182   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
    183   UseScratchRegisterScope temps(masm);
    184 
    185   Register base = masm->StackPointer();
    186   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
    187   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
    188   int64_t reg_size = kXRegSizeInBytes;
    189   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
    190   uint32_t ls_access_size = WhichPowerOf2(reg_size);
    191   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
    192       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
    193     // If the offset does not fit in the instruction's immediate field, use an alternate register
    194     // to compute the base address(float point registers spill base address).
    195     Register new_base = temps.AcquireSameSizeAs(base);
    196     __ Add(new_base, base, Operand(spill_offset + core_spill_size));
    197     base = new_base;
    198     spill_offset = -core_spill_size;
    199     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
    200     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
    201     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
    202   }
    203 
    204   if (is_save) {
    205     __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
    206     __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
    207   } else {
    208     __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
    209     __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
    210   }
    211 }
    212 
    213 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
    214   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
    215   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
    216   for (uint32_t i : LowToHighBits(core_spills)) {
    217     // If the register holds an object, update the stack mask.
    218     if (locations->RegisterContainsObject(i)) {
    219       locations->SetStackBit(stack_offset / kVRegSize);
    220     }
    221     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
    222     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
    223     saved_core_stack_offsets_[i] = stack_offset;
    224     stack_offset += kXRegSizeInBytes;
    225   }
    226 
    227   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
    228   for (uint32_t i : LowToHighBits(fp_spills)) {
    229     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
    230     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
    231     saved_fpu_stack_offsets_[i] = stack_offset;
    232     stack_offset += kDRegSizeInBytes;
    233   }
    234 
    235   SaveRestoreLiveRegistersHelper(codegen,
    236                                  locations,
    237                                  codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
    238 }
    239 
    240 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
    241   SaveRestoreLiveRegistersHelper(codegen,
    242                                  locations,
    243                                  codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
    244 }
    245 
    246 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
    247  public:
    248   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
    249 
    250   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    251     LocationSummary* locations = instruction_->GetLocations();
    252     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    253 
    254     __ Bind(GetEntryLabel());
    255     if (instruction_->CanThrowIntoCatchBlock()) {
    256       // Live registers will be restored in the catch block if caught.
    257       SaveLiveRegisters(codegen, instruction_->GetLocations());
    258     }
    259     // We're moving two locations to locations that could overlap, so we need a parallel
    260     // move resolver.
    261     InvokeRuntimeCallingConvention calling_convention;
    262     codegen->EmitParallelMoves(
    263         locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
    264         locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
    265     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
    266         ? kQuickThrowStringBounds
    267         : kQuickThrowArrayBounds;
    268     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
    269     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
    270     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
    271   }
    272 
    273   bool IsFatal() const OVERRIDE { return true; }
    274 
    275   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
    276 
    277  private:
    278   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
    279 };
    280 
    281 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
    282  public:
    283   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
    284 
    285   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    286     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    287     __ Bind(GetEntryLabel());
    288     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
    289     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
    290   }
    291 
    292   bool IsFatal() const OVERRIDE { return true; }
    293 
    294   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
    295 
    296  private:
    297   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
    298 };
    299 
    300 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
    301  public:
    302   LoadClassSlowPathARM64(HLoadClass* cls,
    303                          HInstruction* at,
    304                          uint32_t dex_pc,
    305                          bool do_clinit,
    306                          vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(),
    307                          vixl::aarch64::Label* bss_entry_adrp_label = nullptr)
    308       : SlowPathCodeARM64(at),
    309         cls_(cls),
    310         dex_pc_(dex_pc),
    311         do_clinit_(do_clinit),
    312         bss_entry_temp_(bss_entry_temp),
    313         bss_entry_adrp_label_(bss_entry_adrp_label) {
    314     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
    315   }
    316 
    317   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    318     LocationSummary* locations = instruction_->GetLocations();
    319     Location out = locations->Out();
    320     constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
    321     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    322 
    323     InvokeRuntimeCallingConvention calling_convention;
    324     // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp
    325     // register, make sure it's not clobbered by the call or by saving/restoring registers.
    326     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
    327     bool is_load_class_bss_entry =
    328         (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
    329     if (is_load_class_bss_entry) {
    330       DCHECK(bss_entry_temp_.IsValid());
    331       DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0)));
    332       DCHECK(
    333           !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_));
    334     }
    335 
    336     __ Bind(GetEntryLabel());
    337     SaveLiveRegisters(codegen, locations);
    338 
    339     dex::TypeIndex type_index = cls_->GetTypeIndex();
    340     __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
    341     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
    342                                                 : kQuickInitializeType;
    343     arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
    344     if (do_clinit_) {
    345       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
    346     } else {
    347       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
    348     }
    349 
    350     // Move the class to the desired location.
    351     if (out.IsValid()) {
    352       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    353       Primitive::Type type = instruction_->GetType();
    354       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
    355     }
    356     RestoreLiveRegisters(codegen, locations);
    357     // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
    358     if (is_load_class_bss_entry) {
    359       DCHECK(out.IsValid());
    360       const DexFile& dex_file = cls_->GetDexFile();
    361       if (call_saves_everything_except_r0_ip0) {
    362         // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything.
    363       } else {
    364         // For non-Baker read barrier, we need to re-calculate the address of the class entry page.
    365         bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
    366         arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_);
    367       }
    368       vixl::aarch64::Label* strp_label =
    369           arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_);
    370       {
    371         SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
    372         __ Bind(strp_label);
    373         __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
    374                MemOperand(bss_entry_temp_, /* offset placeholder */ 0));
    375       }
    376     }
    377     __ B(GetExitLabel());
    378   }
    379 
    380   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
    381 
    382  private:
    383   // The class this slow path will load.
    384   HLoadClass* const cls_;
    385 
    386   // The dex PC of `at_`.
    387   const uint32_t dex_pc_;
    388 
    389   // Whether to initialize the class.
    390   const bool do_clinit_;
    391 
    392   // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded.
    393   vixl::aarch64::Register bss_entry_temp_;
    394   vixl::aarch64::Label* bss_entry_adrp_label_;
    395 
    396   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
    397 };
    398 
    399 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
    400  public:
    401   LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
    402       : SlowPathCodeARM64(instruction),
    403         temp_(temp),
    404         adrp_label_(adrp_label) {}
    405 
    406   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    407     LocationSummary* locations = instruction_->GetLocations();
    408     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    409     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    410 
    411     InvokeRuntimeCallingConvention calling_convention;
    412     // Make sure `temp_` is not clobbered by the call or by saving/restoring registers.
    413     DCHECK(temp_.IsValid());
    414     DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0)));
    415     DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_));
    416 
    417     __ Bind(GetEntryLabel());
    418     SaveLiveRegisters(codegen, locations);
    419 
    420     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
    421     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
    422     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
    423     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
    424     Primitive::Type type = instruction_->GetType();
    425     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
    426 
    427     RestoreLiveRegisters(codegen, locations);
    428 
    429     // Store the resolved String to the BSS entry.
    430     const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
    431     if (!kUseReadBarrier || kUseBakerReadBarrier) {
    432       // The string entry page address was preserved in temp_ thanks to kSaveEverything.
    433     } else {
    434       // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
    435       adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
    436       arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
    437     }
    438     vixl::aarch64::Label* strp_label =
    439         arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
    440     {
    441       SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
    442       __ Bind(strp_label);
    443       __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
    444              MemOperand(temp_, /* offset placeholder */ 0));
    445     }
    446 
    447     __ B(GetExitLabel());
    448   }
    449 
    450   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
    451 
    452  private:
    453   const Register temp_;
    454   vixl::aarch64::Label* adrp_label_;
    455 
    456   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
    457 };
    458 
    459 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
    460  public:
    461   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
    462 
    463   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    464     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    465     __ Bind(GetEntryLabel());
    466     if (instruction_->CanThrowIntoCatchBlock()) {
    467       // Live registers will be restored in the catch block if caught.
    468       SaveLiveRegisters(codegen, instruction_->GetLocations());
    469     }
    470     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
    471                                  instruction_,
    472                                  instruction_->GetDexPc(),
    473                                  this);
    474     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
    475   }
    476 
    477   bool IsFatal() const OVERRIDE { return true; }
    478 
    479   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
    480 
    481  private:
    482   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
    483 };
    484 
    485 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
    486  public:
    487   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
    488       : SlowPathCodeARM64(instruction), successor_(successor) {}
    489 
    490   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    491     LocationSummary* locations = instruction_->GetLocations();
    492     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    493     __ Bind(GetEntryLabel());
    494     SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
    495     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
    496     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
    497     RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
    498     if (successor_ == nullptr) {
    499       __ B(GetReturnLabel());
    500     } else {
    501       __ B(arm64_codegen->GetLabelOf(successor_));
    502     }
    503   }
    504 
    505   vixl::aarch64::Label* GetReturnLabel() {
    506     DCHECK(successor_ == nullptr);
    507     return &return_label_;
    508   }
    509 
    510   HBasicBlock* GetSuccessor() const {
    511     return successor_;
    512   }
    513 
    514   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
    515 
    516  private:
    517   // If not null, the block to branch to after the suspend check.
    518   HBasicBlock* const successor_;
    519 
    520   // If `successor_` is null, the label to branch to after the suspend check.
    521   vixl::aarch64::Label return_label_;
    522 
    523   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
    524 };
    525 
    526 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
    527  public:
    528   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
    529       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
    530 
    531   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    532     LocationSummary* locations = instruction_->GetLocations();
    533 
    534     DCHECK(instruction_->IsCheckCast()
    535            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    536     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    537     uint32_t dex_pc = instruction_->GetDexPc();
    538 
    539     __ Bind(GetEntryLabel());
    540 
    541     if (!is_fatal_) {
    542       SaveLiveRegisters(codegen, locations);
    543     }
    544 
    545     // We're moving two locations to locations that could overlap, so we need a parallel
    546     // move resolver.
    547     InvokeRuntimeCallingConvention calling_convention;
    548     codegen->EmitParallelMoves(locations->InAt(0),
    549                                LocationFrom(calling_convention.GetRegisterAt(0)),
    550                                Primitive::kPrimNot,
    551                                locations->InAt(1),
    552                                LocationFrom(calling_convention.GetRegisterAt(1)),
    553                                Primitive::kPrimNot);
    554     if (instruction_->IsInstanceOf()) {
    555       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
    556       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
    557       Primitive::Type ret_type = instruction_->GetType();
    558       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
    559       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
    560     } else {
    561       DCHECK(instruction_->IsCheckCast());
    562       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
    563       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
    564     }
    565 
    566     if (!is_fatal_) {
    567       RestoreLiveRegisters(codegen, locations);
    568       __ B(GetExitLabel());
    569     }
    570   }
    571 
    572   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
    573   bool IsFatal() const OVERRIDE { return is_fatal_; }
    574 
    575  private:
    576   const bool is_fatal_;
    577 
    578   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
    579 };
    580 
    581 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
    582  public:
    583   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
    584       : SlowPathCodeARM64(instruction) {}
    585 
    586   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    587     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    588     __ Bind(GetEntryLabel());
    589     LocationSummary* locations = instruction_->GetLocations();
    590     SaveLiveRegisters(codegen, locations);
    591     InvokeRuntimeCallingConvention calling_convention;
    592     __ Mov(calling_convention.GetRegisterAt(0),
    593            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
    594     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
    595     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
    596   }
    597 
    598   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
    599 
    600  private:
    601   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
    602 };
    603 
    604 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
    605  public:
    606   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
    607 
    608   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    609     LocationSummary* locations = instruction_->GetLocations();
    610     __ Bind(GetEntryLabel());
    611     SaveLiveRegisters(codegen, locations);
    612 
    613     InvokeRuntimeCallingConvention calling_convention;
    614     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
    615     parallel_move.AddMove(
    616         locations->InAt(0),
    617         LocationFrom(calling_convention.GetRegisterAt(0)),
    618         Primitive::kPrimNot,
    619         nullptr);
    620     parallel_move.AddMove(
    621         locations->InAt(1),
    622         LocationFrom(calling_convention.GetRegisterAt(1)),
    623         Primitive::kPrimInt,
    624         nullptr);
    625     parallel_move.AddMove(
    626         locations->InAt(2),
    627         LocationFrom(calling_convention.GetRegisterAt(2)),
    628         Primitive::kPrimNot,
    629         nullptr);
    630     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    631 
    632     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    633     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
    634     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
    635     RestoreLiveRegisters(codegen, locations);
    636     __ B(GetExitLabel());
    637   }
    638 
    639   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
    640 
    641  private:
    642   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
    643 };
    644 
    645 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
    646   uint32_t num_entries = switch_instr_->GetNumEntries();
    647   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
    648 
    649   // We are about to use the assembler to place literals directly. Make sure we have enough
    650   // underlying code buffer and we have generated the jump table with right size.
    651   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
    652                            num_entries * sizeof(int32_t),
    653                            CodeBufferCheckScope::kExactSize);
    654 
    655   __ Bind(&table_start_);
    656   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
    657   for (uint32_t i = 0; i < num_entries; i++) {
    658     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
    659     DCHECK(target_label->IsBound());
    660     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
    661     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
    662     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
    663     Literal<int32_t> literal(jump_offset);
    664     __ place(&literal);
    665   }
    666 }
    667 
    668 // Abstract base class for read barrier slow paths marking a reference
    669 // `ref`.
    670 //
    671 // Argument `entrypoint` must be a register location holding the read
    672 // barrier marking runtime entry point to be invoked.
    673 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
    674  protected:
    675   ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
    676       : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
    677     DCHECK(kEmitCompilerReadBarrier);
    678   }
    679 
    680   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
    681 
    682   // Generate assembly code calling the read barrier marking runtime
    683   // entry point (ReadBarrierMarkRegX).
    684   void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
    685     // No need to save live registers; it's taken care of by the
    686     // entrypoint. Also, there is no need to update the stack mask,
    687     // as this runtime call will not trigger a garbage collection.
    688     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    689     DCHECK_NE(ref_.reg(), LR);
    690     DCHECK_NE(ref_.reg(), WSP);
    691     DCHECK_NE(ref_.reg(), WZR);
    692     // IP0 is used internally by the ReadBarrierMarkRegX entry point
    693     // as a temporary, it cannot be the entry point's input/output.
    694     DCHECK_NE(ref_.reg(), IP0);
    695     DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
    696     // "Compact" slow path, saving two moves.
    697     //
    698     // Instead of using the standard runtime calling convention (input
    699     // and output in W0):
    700     //
    701     //   W0 <- ref
    702     //   W0 <- ReadBarrierMark(W0)
    703     //   ref <- W0
    704     //
    705     // we just use rX (the register containing `ref`) as input and output
    706     // of a dedicated entrypoint:
    707     //
    708     //   rX <- ReadBarrierMarkRegX(rX)
    709     //
    710     if (entrypoint_.IsValid()) {
    711       arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
    712       __ Blr(XRegisterFrom(entrypoint_));
    713     } else {
    714       // Entrypoint is not already loaded, load from the thread.
    715       int32_t entry_point_offset =
    716           CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
    717       // This runtime call does not require a stack map.
    718       arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    719     }
    720   }
    721 
    722   // The location (register) of the marked object reference.
    723   const Location ref_;
    724 
    725   // The location of the entrypoint if it is already loaded.
    726   const Location entrypoint_;
    727 
    728  private:
    729   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
    730 };
    731 
    732 // Slow path marking an object reference `ref` during a read
    733 // barrier. The field `obj.field` in the object `obj` holding this
    734 // reference does not get updated by this slow path after marking.
    735 //
    736 // This means that after the execution of this slow path, `ref` will
    737 // always be up-to-date, but `obj.field` may not; i.e., after the
    738 // flip, `ref` will be a to-space reference, but `obj.field` will
    739 // probably still be a from-space reference (unless it gets updated by
    740 // another thread, or if another thread installed another object
    741 // reference (different from `ref`) in `obj.field`).
    742 //
    743 // If `entrypoint` is a valid location it is assumed to already be
    744 // holding the entrypoint. The case where the entrypoint is passed in
    745 // is when the decision to mark is based on whether the GC is marking.
    746 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
    747  public:
    748   ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
    749                                Location ref,
    750                                Location entrypoint = Location::NoLocation())
    751       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
    752     DCHECK(kEmitCompilerReadBarrier);
    753   }
    754 
    755   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
    756 
    757   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    758     LocationSummary* locations = instruction_->GetLocations();
    759     DCHECK(locations->CanCall());
    760     DCHECK(ref_.IsRegister()) << ref_;
    761     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
    762     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
    763         << "Unexpected instruction in read barrier marking slow path: "
    764         << instruction_->DebugName();
    765 
    766     __ Bind(GetEntryLabel());
    767     GenerateReadBarrierMarkRuntimeCall(codegen);
    768     __ B(GetExitLabel());
    769   }
    770 
    771  private:
    772   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
    773 };
    774 
    775 // Slow path loading `obj`'s lock word, loading a reference from
    776 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
    777 // marking `ref` if `obj` is gray according to the lock word (Baker
    778 // read barrier). The field `obj.field` in the object `obj` holding
    779 // this reference does not get updated by this slow path after marking
    780 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
    781 // below for that).
    782 //
    783 // This means that after the execution of this slow path, `ref` will
    784 // always be up-to-date, but `obj.field` may not; i.e., after the
    785 // flip, `ref` will be a to-space reference, but `obj.field` will
    786 // probably still be a from-space reference (unless it gets updated by
    787 // another thread, or if another thread installed another object
    788 // reference (different from `ref`) in `obj.field`).
    789 //
    790 // Argument `entrypoint` must be a register location holding the read
    791 // barrier marking runtime entry point to be invoked.
    792 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
    793  public:
    794   LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
    795                                                  Location ref,
    796                                                  Register obj,
    797                                                  uint32_t offset,
    798                                                  Location index,
    799                                                  size_t scale_factor,
    800                                                  bool needs_null_check,
    801                                                  bool use_load_acquire,
    802                                                  Register temp,
    803                                                  Location entrypoint)
    804       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
    805         obj_(obj),
    806         offset_(offset),
    807         index_(index),
    808         scale_factor_(scale_factor),
    809         needs_null_check_(needs_null_check),
    810         use_load_acquire_(use_load_acquire),
    811         temp_(temp) {
    812     DCHECK(kEmitCompilerReadBarrier);
    813     DCHECK(kUseBakerReadBarrier);
    814   }
    815 
    816   const char* GetDescription() const OVERRIDE {
    817     return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
    818   }
    819 
    820   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    821     LocationSummary* locations = instruction_->GetLocations();
    822     DCHECK(locations->CanCall());
    823     DCHECK(ref_.IsRegister()) << ref_;
    824     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
    825     DCHECK(obj_.IsW());
    826     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
    827     DCHECK(instruction_->IsInstanceFieldGet() ||
    828            instruction_->IsStaticFieldGet() ||
    829            instruction_->IsArrayGet() ||
    830            instruction_->IsArraySet() ||
    831            instruction_->IsInstanceOf() ||
    832            instruction_->IsCheckCast() ||
    833            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
    834            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
    835         << "Unexpected instruction in read barrier marking slow path: "
    836         << instruction_->DebugName();
    837     // The read barrier instrumentation of object ArrayGet
    838     // instructions does not support the HIntermediateAddress
    839     // instruction.
    840     DCHECK(!(instruction_->IsArrayGet() &&
    841              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
    842 
    843     // Temporary register `temp_`, used to store the lock word, must
    844     // not be IP0 nor IP1, as we may use them to emit the reference
    845     // load (in the call to GenerateRawReferenceLoad below), and we
    846     // need the lock word to still be in `temp_` after the reference
    847     // load.
    848     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
    849     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
    850 
    851     __ Bind(GetEntryLabel());
    852 
    853     // When using MaybeGenerateReadBarrierSlow, the read barrier call is
    854     // inserted after the original load. However, in fast path based
    855     // Baker's read barriers, we need to perform the load of
    856     // mirror::Object::monitor_ *before* the original reference load.
    857     // This load-load ordering is required by the read barrier.
    858     // The fast path/slow path (for Baker's algorithm) should look like:
    859     //
    860     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
    861     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
    862     //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
    863     //   bool is_gray = (rb_state == ReadBarrier::GrayState());
    864     //   if (is_gray) {
    865     //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
    866     //   }
    867     //
    868     // Note: the original implementation in ReadBarrier::Barrier is
    869     // slightly more complex as it performs additional checks that we do
    870     // not do here for performance reasons.
    871 
    872     // /* int32_t */ monitor = obj->monitor_
    873     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
    874     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
    875     if (needs_null_check_) {
    876       codegen->MaybeRecordImplicitNullCheck(instruction_);
    877     }
    878     // /* LockWord */ lock_word = LockWord(monitor)
    879     static_assert(sizeof(LockWord) == sizeof(int32_t),
    880                   "art::LockWord and int32_t have different sizes.");
    881 
    882     // Introduce a dependency on the lock_word including rb_state,
    883     // to prevent load-load reordering, and without using
    884     // a memory barrier (which would be more expensive).
    885     // `obj` is unchanged by this operation, but its value now depends
    886     // on `temp`.
    887     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
    888 
    889     // The actual reference load.
    890     // A possible implicit null check has already been handled above.
    891     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
    892     arm64_codegen->GenerateRawReferenceLoad(instruction_,
    893                                             ref_,
    894                                             obj_,
    895                                             offset_,
    896                                             index_,
    897                                             scale_factor_,
    898                                             /* needs_null_check */ false,
    899                                             use_load_acquire_);
    900 
    901     // Mark the object `ref` when `obj` is gray.
    902     //
    903     //   if (rb_state == ReadBarrier::GrayState())
    904     //     ref = ReadBarrier::Mark(ref);
    905     //
    906     // Given the numeric representation, it's enough to check the low bit of the rb_state.
    907     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
    908     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
    909     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
    910     GenerateReadBarrierMarkRuntimeCall(codegen);
    911 
    912     __ B(GetExitLabel());
    913   }
    914 
    915  private:
    916   // The register containing the object holding the marked object reference field.
    917   Register obj_;
    918   // The offset, index and scale factor to access the reference in `obj_`.
    919   uint32_t offset_;
    920   Location index_;
    921   size_t scale_factor_;
    922   // Is a null check required?
    923   bool needs_null_check_;
    924   // Should this reference load use Load-Acquire semantics?
    925   bool use_load_acquire_;
    926   // A temporary register used to hold the lock word of `obj_`.
    927   Register temp_;
    928 
    929   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
    930 };
    931 
    932 // Slow path loading `obj`'s lock word, loading a reference from
    933 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
    934 // marking `ref` if `obj` is gray according to the lock word (Baker
    935 // read barrier). If needed, this slow path also atomically updates
    936 // the field `obj.field` in the object `obj` holding this reference
    937 // after marking (contrary to
    938 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
    939 // tries to update `obj.field`).
    940 //
    941 // This means that after the execution of this slow path, both `ref`
    942 // and `obj.field` will be up-to-date; i.e., after the flip, both will
    943 // hold the same to-space reference (unless another thread installed
    944 // another object reference (different from `ref`) in `obj.field`).
    945 //
    946 // Argument `entrypoint` must be a register location holding the read
    947 // barrier marking runtime entry point to be invoked.
    948 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
    949     : public ReadBarrierMarkSlowPathBaseARM64 {
    950  public:
    951   LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
    952                                                                Location ref,
    953                                                                Register obj,
    954                                                                uint32_t offset,
    955                                                                Location index,
    956                                                                size_t scale_factor,
    957                                                                bool needs_null_check,
    958                                                                bool use_load_acquire,
    959                                                                Register temp,
    960                                                                Location entrypoint)
    961       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
    962         obj_(obj),
    963         offset_(offset),
    964         index_(index),
    965         scale_factor_(scale_factor),
    966         needs_null_check_(needs_null_check),
    967         use_load_acquire_(use_load_acquire),
    968         temp_(temp) {
    969     DCHECK(kEmitCompilerReadBarrier);
    970     DCHECK(kUseBakerReadBarrier);
    971   }
    972 
    973   const char* GetDescription() const OVERRIDE {
    974     return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
    975   }
    976 
    977   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    978     LocationSummary* locations = instruction_->GetLocations();
    979     Register ref_reg = WRegisterFrom(ref_);
    980     DCHECK(locations->CanCall());
    981     DCHECK(ref_.IsRegister()) << ref_;
    982     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
    983     DCHECK(obj_.IsW());
    984     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
    985 
    986     // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
    987     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
    988         << "Unexpected instruction in read barrier marking and field updating slow path: "
    989         << instruction_->DebugName();
    990     DCHECK(instruction_->GetLocations()->Intrinsified());
    991     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
    992     DCHECK_EQ(offset_, 0u);
    993     DCHECK_EQ(scale_factor_, 0u);
    994     DCHECK_EQ(use_load_acquire_, false);
    995     // The location of the offset of the marked reference field within `obj_`.
    996     Location field_offset = index_;
    997     DCHECK(field_offset.IsRegister()) << field_offset;
    998 
    999     // Temporary register `temp_`, used to store the lock word, must
   1000     // not be IP0 nor IP1, as we may use them to emit the reference
   1001     // load (in the call to GenerateRawReferenceLoad below), and we
   1002     // need the lock word to still be in `temp_` after the reference
   1003     // load.
   1004     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
   1005     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
   1006 
   1007     __ Bind(GetEntryLabel());
   1008 
   1009     // /* int32_t */ monitor = obj->monitor_
   1010     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   1011     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
   1012     if (needs_null_check_) {
   1013       codegen->MaybeRecordImplicitNullCheck(instruction_);
   1014     }
   1015     // /* LockWord */ lock_word = LockWord(monitor)
   1016     static_assert(sizeof(LockWord) == sizeof(int32_t),
   1017                   "art::LockWord and int32_t have different sizes.");
   1018 
   1019     // Introduce a dependency on the lock_word including rb_state,
   1020     // to prevent load-load reordering, and without using
   1021     // a memory barrier (which would be more expensive).
   1022     // `obj` is unchanged by this operation, but its value now depends
   1023     // on `temp`.
   1024     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
   1025 
   1026     // The actual reference load.
   1027     // A possible implicit null check has already been handled above.
   1028     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
   1029     arm64_codegen->GenerateRawReferenceLoad(instruction_,
   1030                                             ref_,
   1031                                             obj_,
   1032                                             offset_,
   1033                                             index_,
   1034                                             scale_factor_,
   1035                                             /* needs_null_check */ false,
   1036                                             use_load_acquire_);
   1037 
   1038     // Mark the object `ref` when `obj` is gray.
   1039     //
   1040     //   if (rb_state == ReadBarrier::GrayState())
   1041     //     ref = ReadBarrier::Mark(ref);
   1042     //
   1043     // Given the numeric representation, it's enough to check the low bit of the rb_state.
   1044     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   1045     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   1046     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
   1047 
   1048     // Save the old value of the reference before marking it.
   1049     // Note that we cannot use IP to save the old reference, as IP is
   1050     // used internally by the ReadBarrierMarkRegX entry point, and we
   1051     // need the old reference after the call to that entry point.
   1052     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
   1053     __ Mov(temp_.W(), ref_reg);
   1054 
   1055     GenerateReadBarrierMarkRuntimeCall(codegen);
   1056 
   1057     // If the new reference is different from the old reference,
   1058     // update the field in the holder (`*(obj_ + field_offset)`).
   1059     //
   1060     // Note that this field could also hold a different object, if
   1061     // another thread had concurrently changed it. In that case, the
   1062     // LDXR/CMP/BNE sequence of instructions in the compare-and-set
   1063     // (CAS) operation below would abort the CAS, leaving the field
   1064     // as-is.
   1065     __ Cmp(temp_.W(), ref_reg);
   1066     __ B(eq, GetExitLabel());
   1067 
   1068     // Update the the holder's field atomically.  This may fail if
   1069     // mutator updates before us, but it's OK.  This is achieved
   1070     // using a strong compare-and-set (CAS) operation with relaxed
   1071     // memory synchronization ordering, where the expected value is
   1072     // the old reference and the desired value is the new reference.
   1073 
   1074     MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
   1075     UseScratchRegisterScope temps(masm);
   1076 
   1077     // Convenience aliases.
   1078     Register base = obj_.W();
   1079     Register offset = XRegisterFrom(field_offset);
   1080     Register expected = temp_.W();
   1081     Register value = ref_reg;
   1082     Register tmp_ptr = temps.AcquireX();    // Pointer to actual memory.
   1083     Register tmp_value = temps.AcquireW();  // Value in memory.
   1084 
   1085     __ Add(tmp_ptr, base.X(), Operand(offset));
   1086 
   1087     if (kPoisonHeapReferences) {
   1088       arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
   1089       if (value.Is(expected)) {
   1090         // Do not poison `value`, as it is the same register as
   1091         // `expected`, which has just been poisoned.
   1092       } else {
   1093         arm64_codegen->GetAssembler()->PoisonHeapReference(value);
   1094       }
   1095     }
   1096 
   1097     // do {
   1098     //   tmp_value = [tmp_ptr] - expected;
   1099     // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   1100 
   1101     vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
   1102     __ Bind(&loop_head);
   1103     __ Ldxr(tmp_value, MemOperand(tmp_ptr));
   1104     __ Cmp(tmp_value, expected);
   1105     __ B(&comparison_failed, ne);
   1106     __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
   1107     __ Cbnz(tmp_value, &loop_head);
   1108     __ B(&exit_loop);
   1109     __ Bind(&comparison_failed);
   1110     __ Clrex();
   1111     __ Bind(&exit_loop);
   1112 
   1113     if (kPoisonHeapReferences) {
   1114       arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
   1115       if (value.Is(expected)) {
   1116         // Do not unpoison `value`, as it is the same register as
   1117         // `expected`, which has just been unpoisoned.
   1118       } else {
   1119         arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
   1120       }
   1121     }
   1122 
   1123     __ B(GetExitLabel());
   1124   }
   1125 
   1126  private:
   1127   // The register containing the object holding the marked object reference field.
   1128   const Register obj_;
   1129   // The offset, index and scale factor to access the reference in `obj_`.
   1130   uint32_t offset_;
   1131   Location index_;
   1132   size_t scale_factor_;
   1133   // Is a null check required?
   1134   bool needs_null_check_;
   1135   // Should this reference load use Load-Acquire semantics?
   1136   bool use_load_acquire_;
   1137   // A temporary register used to hold the lock word of `obj_`; and
   1138   // also to hold the original reference value, when the reference is
   1139   // marked.
   1140   const Register temp_;
   1141 
   1142   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
   1143 };
   1144 
   1145 // Slow path generating a read barrier for a heap reference.
   1146 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
   1147  public:
   1148   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
   1149                                            Location out,
   1150                                            Location ref,
   1151                                            Location obj,
   1152                                            uint32_t offset,
   1153                                            Location index)
   1154       : SlowPathCodeARM64(instruction),
   1155         out_(out),
   1156         ref_(ref),
   1157         obj_(obj),
   1158         offset_(offset),
   1159         index_(index) {
   1160     DCHECK(kEmitCompilerReadBarrier);
   1161     // If `obj` is equal to `out` or `ref`, it means the initial object
   1162     // has been overwritten by (or after) the heap object reference load
   1163     // to be instrumented, e.g.:
   1164     //
   1165     //   __ Ldr(out, HeapOperand(out, class_offset);
   1166     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
   1167     //
   1168     // In that case, we have lost the information about the original
   1169     // object, and the emitted read barrier cannot work properly.
   1170     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
   1171     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
   1172   }
   1173 
   1174   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
   1175     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
   1176     LocationSummary* locations = instruction_->GetLocations();
   1177     Primitive::Type type = Primitive::kPrimNot;
   1178     DCHECK(locations->CanCall());
   1179     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
   1180     DCHECK(instruction_->IsInstanceFieldGet() ||
   1181            instruction_->IsStaticFieldGet() ||
   1182            instruction_->IsArrayGet() ||
   1183            instruction_->IsInstanceOf() ||
   1184            instruction_->IsCheckCast() ||
   1185            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
   1186         << "Unexpected instruction in read barrier for heap reference slow path: "
   1187         << instruction_->DebugName();
   1188     // The read barrier instrumentation of object ArrayGet
   1189     // instructions does not support the HIntermediateAddress
   1190     // instruction.
   1191     DCHECK(!(instruction_->IsArrayGet() &&
   1192              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
   1193 
   1194     __ Bind(GetEntryLabel());
   1195 
   1196     SaveLiveRegisters(codegen, locations);
   1197 
   1198     // We may have to change the index's value, but as `index_` is a
   1199     // constant member (like other "inputs" of this slow path),
   1200     // introduce a copy of it, `index`.
   1201     Location index = index_;
   1202     if (index_.IsValid()) {
   1203       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
   1204       if (instruction_->IsArrayGet()) {
   1205         // Compute the actual memory offset and store it in `index`.
   1206         Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
   1207         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
   1208         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
   1209           // We are about to change the value of `index_reg` (see the
   1210           // calls to vixl::MacroAssembler::Lsl and
   1211           // vixl::MacroAssembler::Mov below), but it has
   1212           // not been saved by the previous call to
   1213           // art::SlowPathCode::SaveLiveRegisters, as it is a
   1214           // callee-save register --
   1215           // art::SlowPathCode::SaveLiveRegisters does not consider
   1216           // callee-save registers, as it has been designed with the
   1217           // assumption that callee-save registers are supposed to be
   1218           // handled by the called function.  So, as a callee-save
   1219           // register, `index_reg` _would_ eventually be saved onto
   1220           // the stack, but it would be too late: we would have
   1221           // changed its value earlier.  Therefore, we manually save
   1222           // it here into another freely available register,
   1223           // `free_reg`, chosen of course among the caller-save
   1224           // registers (as a callee-save `free_reg` register would
   1225           // exhibit the same problem).
   1226           //
   1227           // Note we could have requested a temporary register from
   1228           // the register allocator instead; but we prefer not to, as
   1229           // this is a slow path, and we know we can find a
   1230           // caller-save register that is available.
   1231           Register free_reg = FindAvailableCallerSaveRegister(codegen);
   1232           __ Mov(free_reg.W(), index_reg);
   1233           index_reg = free_reg;
   1234           index = LocationFrom(index_reg);
   1235         } else {
   1236           // The initial register stored in `index_` has already been
   1237           // saved in the call to art::SlowPathCode::SaveLiveRegisters
   1238           // (as it is not a callee-save register), so we can freely
   1239           // use it.
   1240         }
   1241         // Shifting the index value contained in `index_reg` by the scale
   1242         // factor (2) cannot overflow in practice, as the runtime is
   1243         // unable to allocate object arrays with a size larger than
   1244         // 2^26 - 1 (that is, 2^28 - 4 bytes).
   1245         __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
   1246         static_assert(
   1247             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   1248             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   1249         __ Add(index_reg, index_reg, Operand(offset_));
   1250       } else {
   1251         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
   1252         // intrinsics, `index_` is not shifted by a scale factor of 2
   1253         // (as in the case of ArrayGet), as it is actually an offset
   1254         // to an object field within an object.
   1255         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
   1256         DCHECK(instruction_->GetLocations()->Intrinsified());
   1257         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
   1258                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
   1259             << instruction_->AsInvoke()->GetIntrinsic();
   1260         DCHECK_EQ(offset_, 0u);
   1261         DCHECK(index_.IsRegister());
   1262       }
   1263     }
   1264 
   1265     // We're moving two or three locations to locations that could
   1266     // overlap, so we need a parallel move resolver.
   1267     InvokeRuntimeCallingConvention calling_convention;
   1268     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
   1269     parallel_move.AddMove(ref_,
   1270                           LocationFrom(calling_convention.GetRegisterAt(0)),
   1271                           type,
   1272                           nullptr);
   1273     parallel_move.AddMove(obj_,
   1274                           LocationFrom(calling_convention.GetRegisterAt(1)),
   1275                           type,
   1276                           nullptr);
   1277     if (index.IsValid()) {
   1278       parallel_move.AddMove(index,
   1279                             LocationFrom(calling_convention.GetRegisterAt(2)),
   1280                             Primitive::kPrimInt,
   1281                             nullptr);
   1282       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
   1283     } else {
   1284       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
   1285       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
   1286     }
   1287     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
   1288                                  instruction_,
   1289                                  instruction_->GetDexPc(),
   1290                                  this);
   1291     CheckEntrypointTypes<
   1292         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
   1293     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
   1294 
   1295     RestoreLiveRegisters(codegen, locations);
   1296 
   1297     __ B(GetExitLabel());
   1298   }
   1299 
   1300   const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
   1301 
   1302  private:
   1303   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
   1304     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
   1305     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
   1306     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
   1307       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
   1308         return Register(VIXLRegCodeFromART(i), kXRegSize);
   1309       }
   1310     }
   1311     // We shall never fail to find a free caller-save register, as
   1312     // there are more than two core caller-save registers on ARM64
   1313     // (meaning it is possible to find one which is different from
   1314     // `ref` and `obj`).
   1315     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
   1316     LOG(FATAL) << "Could not find a free register";
   1317     UNREACHABLE();
   1318   }
   1319 
   1320   const Location out_;
   1321   const Location ref_;
   1322   const Location obj_;
   1323   const uint32_t offset_;
   1324   // An additional location containing an index to an array.
   1325   // Only used for HArrayGet and the UnsafeGetObject &
   1326   // UnsafeGetObjectVolatile intrinsics.
   1327   const Location index_;
   1328 
   1329   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
   1330 };
   1331 
   1332 // Slow path generating a read barrier for a GC root.
   1333 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
   1334  public:
   1335   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
   1336       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
   1337     DCHECK(kEmitCompilerReadBarrier);
   1338   }
   1339 
   1340   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
   1341     LocationSummary* locations = instruction_->GetLocations();
   1342     Primitive::Type type = Primitive::kPrimNot;
   1343     DCHECK(locations->CanCall());
   1344     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
   1345     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
   1346         << "Unexpected instruction in read barrier for GC root slow path: "
   1347         << instruction_->DebugName();
   1348 
   1349     __ Bind(GetEntryLabel());
   1350     SaveLiveRegisters(codegen, locations);
   1351 
   1352     InvokeRuntimeCallingConvention calling_convention;
   1353     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
   1354     // The argument of the ReadBarrierForRootSlow is not a managed
   1355     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
   1356     // thus we need a 64-bit move here, and we cannot use
   1357     //
   1358     //   arm64_codegen->MoveLocation(
   1359     //       LocationFrom(calling_convention.GetRegisterAt(0)),
   1360     //       root_,
   1361     //       type);
   1362     //
   1363     // which would emit a 32-bit move, as `type` is a (32-bit wide)
   1364     // reference type (`Primitive::kPrimNot`).
   1365     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
   1366     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
   1367                                  instruction_,
   1368                                  instruction_->GetDexPc(),
   1369                                  this);
   1370     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
   1371     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
   1372 
   1373     RestoreLiveRegisters(codegen, locations);
   1374     __ B(GetExitLabel());
   1375   }
   1376 
   1377   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
   1378 
   1379  private:
   1380   const Location out_;
   1381   const Location root_;
   1382 
   1383   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
   1384 };
   1385 
   1386 #undef __
   1387 
   1388 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
   1389   Location next_location;
   1390   if (type == Primitive::kPrimVoid) {
   1391     LOG(FATAL) << "Unreachable type " << type;
   1392   }
   1393 
   1394   if (Primitive::IsFloatingPointType(type) &&
   1395       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
   1396     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
   1397   } else if (!Primitive::IsFloatingPointType(type) &&
   1398              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
   1399     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
   1400   } else {
   1401     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
   1402     next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
   1403                                                  : Location::StackSlot(stack_offset);
   1404   }
   1405 
   1406   // Space on the stack is reserved for all arguments.
   1407   stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
   1408   return next_location;
   1409 }
   1410 
   1411 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
   1412   return LocationFrom(kArtMethodRegister);
   1413 }
   1414 
   1415 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
   1416                                        const Arm64InstructionSetFeatures& isa_features,
   1417                                        const CompilerOptions& compiler_options,
   1418                                        OptimizingCompilerStats* stats)
   1419     : CodeGenerator(graph,
   1420                     kNumberOfAllocatableRegisters,
   1421                     kNumberOfAllocatableFPRegisters,
   1422                     kNumberOfAllocatableRegisterPairs,
   1423                     callee_saved_core_registers.GetList(),
   1424                     callee_saved_fp_registers.GetList(),
   1425                     compiler_options,
   1426                     stats),
   1427       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1428       jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1429       location_builder_(graph, this),
   1430       instruction_visitor_(graph, this),
   1431       move_resolver_(graph->GetArena(), this),
   1432       assembler_(graph->GetArena()),
   1433       isa_features_(isa_features),
   1434       uint32_literals_(std::less<uint32_t>(),
   1435                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1436       uint64_literals_(std::less<uint64_t>(),
   1437                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1438       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1439       boot_image_string_patches_(StringReferenceValueComparator(),
   1440                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1441       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1442       boot_image_type_patches_(TypeReferenceValueComparator(),
   1443                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1444       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1445       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1446       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1447       jit_string_patches_(StringReferenceValueComparator(),
   1448                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1449       jit_class_patches_(TypeReferenceValueComparator(),
   1450                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   1451   // Save the link register (containing the return address) to mimic Quick.
   1452   AddAllocatedRegister(LocationFrom(lr));
   1453 }
   1454 
   1455 #define __ GetVIXLAssembler()->
   1456 
   1457 void CodeGeneratorARM64::EmitJumpTables() {
   1458   for (auto&& jump_table : jump_tables_) {
   1459     jump_table->EmitTable(this);
   1460   }
   1461 }
   1462 
   1463 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
   1464   EmitJumpTables();
   1465   // Ensure we emit the literal pool.
   1466   __ FinalizeCode();
   1467 
   1468   CodeGenerator::Finalize(allocator);
   1469 }
   1470 
   1471 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
   1472   // Note: There are 6 kinds of moves:
   1473   // 1. constant -> GPR/FPR (non-cycle)
   1474   // 2. constant -> stack (non-cycle)
   1475   // 3. GPR/FPR -> GPR/FPR
   1476   // 4. GPR/FPR -> stack
   1477   // 5. stack -> GPR/FPR
   1478   // 6. stack -> stack (non-cycle)
   1479   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
   1480   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
   1481   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
   1482   // dependency.
   1483   vixl_temps_.Open(GetVIXLAssembler());
   1484 }
   1485 
   1486 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
   1487   vixl_temps_.Close();
   1488 }
   1489 
   1490 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
   1491   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
   1492          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
   1493          || kind == Location::kSIMDStackSlot);
   1494   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
   1495       ? Location::kFpuRegister
   1496       : Location::kRegister;
   1497   Location scratch = GetScratchLocation(kind);
   1498   if (!scratch.Equals(Location::NoLocation())) {
   1499     return scratch;
   1500   }
   1501   // Allocate from VIXL temp registers.
   1502   if (kind == Location::kRegister) {
   1503     scratch = LocationFrom(vixl_temps_.AcquireX());
   1504   } else {
   1505     DCHECK(kind == Location::kFpuRegister);
   1506     scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
   1507         ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
   1508         : vixl_temps_.AcquireD());
   1509   }
   1510   AddScratchLocation(scratch);
   1511   return scratch;
   1512 }
   1513 
   1514 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
   1515   if (loc.IsRegister()) {
   1516     vixl_temps_.Release(XRegisterFrom(loc));
   1517   } else {
   1518     DCHECK(loc.IsFpuRegister());
   1519     vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
   1520   }
   1521   RemoveScratchLocation(loc);
   1522 }
   1523 
   1524 void ParallelMoveResolverARM64::EmitMove(size_t index) {
   1525   MoveOperands* move = moves_[index];
   1526   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
   1527 }
   1528 
   1529 void CodeGeneratorARM64::GenerateFrameEntry() {
   1530   MacroAssembler* masm = GetVIXLAssembler();
   1531   __ Bind(&frame_entry_label_);
   1532 
   1533   bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
   1534   if (do_overflow_check) {
   1535     UseScratchRegisterScope temps(masm);
   1536     Register temp = temps.AcquireX();
   1537     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   1538     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
   1539     {
   1540       // Ensure that between load and RecordPcInfo there are no pools emitted.
   1541       ExactAssemblyScope eas(GetVIXLAssembler(),
   1542                              kInstructionSize,
   1543                              CodeBufferCheckScope::kExactSize);
   1544       __ ldr(wzr, MemOperand(temp, 0));
   1545       RecordPcInfo(nullptr, 0);
   1546     }
   1547   }
   1548 
   1549   if (!HasEmptyFrame()) {
   1550     int frame_size = GetFrameSize();
   1551     // Stack layout:
   1552     //      sp[frame_size - 8]        : lr.
   1553     //      ...                       : other preserved core registers.
   1554     //      ...                       : other preserved fp registers.
   1555     //      ...                       : reserved frame space.
   1556     //      sp[0]                     : current method.
   1557 
   1558     // Save the current method if we need it. Note that we do not
   1559     // do this in HCurrentMethod, as the instruction might have been removed
   1560     // in the SSA graph.
   1561     if (RequiresCurrentMethod()) {
   1562       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
   1563     } else {
   1564       __ Claim(frame_size);
   1565     }
   1566     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
   1567     GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
   1568         frame_size - GetCoreSpillSize());
   1569     GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
   1570         frame_size - FrameEntrySpillSize());
   1571 
   1572     if (GetGraph()->HasShouldDeoptimizeFlag()) {
   1573       // Initialize should_deoptimize flag to 0.
   1574       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
   1575       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
   1576     }
   1577   }
   1578 }
   1579 
   1580 void CodeGeneratorARM64::GenerateFrameExit() {
   1581   GetAssembler()->cfi().RememberState();
   1582   if (!HasEmptyFrame()) {
   1583     int frame_size = GetFrameSize();
   1584     GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
   1585         frame_size - FrameEntrySpillSize());
   1586     GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
   1587         frame_size - GetCoreSpillSize());
   1588     __ Drop(frame_size);
   1589     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
   1590   }
   1591   __ Ret();
   1592   GetAssembler()->cfi().RestoreState();
   1593   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
   1594 }
   1595 
   1596 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
   1597   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
   1598   return CPURegList(CPURegister::kRegister, kXRegSize,
   1599                     core_spill_mask_);
   1600 }
   1601 
   1602 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
   1603   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
   1604                                          GetNumberOfFloatingPointRegisters()));
   1605   return CPURegList(CPURegister::kFPRegister, kDRegSize,
   1606                     fpu_spill_mask_);
   1607 }
   1608 
   1609 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
   1610   __ Bind(GetLabelOf(block));
   1611 }
   1612 
   1613 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
   1614   DCHECK(location.IsRegister());
   1615   __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
   1616 }
   1617 
   1618 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
   1619   if (location.IsRegister()) {
   1620     locations->AddTemp(location);
   1621   } else {
   1622     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
   1623   }
   1624 }
   1625 
   1626 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
   1627   UseScratchRegisterScope temps(GetVIXLAssembler());
   1628   Register card = temps.AcquireX();
   1629   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
   1630   vixl::aarch64::Label done;
   1631   if (value_can_be_null) {
   1632     __ Cbz(value, &done);
   1633   }
   1634   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
   1635   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
   1636   __ Strb(card, MemOperand(card, temp.X()));
   1637   if (value_can_be_null) {
   1638     __ Bind(&done);
   1639   }
   1640 }
   1641 
   1642 void CodeGeneratorARM64::SetupBlockedRegisters() const {
   1643   // Blocked core registers:
   1644   //      lr        : Runtime reserved.
   1645   //      tr        : Runtime reserved.
   1646   //      xSuspend  : Runtime reserved. TODO: Unblock this when the runtime stops using it.
   1647   //      ip1       : VIXL core temp.
   1648   //      ip0       : VIXL core temp.
   1649   //
   1650   // Blocked fp registers:
   1651   //      d31       : VIXL fp temp.
   1652   CPURegList reserved_core_registers = vixl_reserved_core_registers;
   1653   reserved_core_registers.Combine(runtime_reserved_core_registers);
   1654   while (!reserved_core_registers.IsEmpty()) {
   1655     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
   1656   }
   1657 
   1658   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
   1659   while (!reserved_fp_registers.IsEmpty()) {
   1660     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
   1661   }
   1662 
   1663   if (GetGraph()->IsDebuggable()) {
   1664     // Stubs do not save callee-save floating point registers. If the graph
   1665     // is debuggable, we need to deal with these registers differently. For
   1666     // now, just block them.
   1667     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
   1668     while (!reserved_fp_registers_debuggable.IsEmpty()) {
   1669       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
   1670     }
   1671   }
   1672 }
   1673 
   1674 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   1675   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   1676   __ Str(reg, MemOperand(sp, stack_index));
   1677   return kArm64WordSize;
   1678 }
   1679 
   1680 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   1681   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
   1682   __ Ldr(reg, MemOperand(sp, stack_index));
   1683   return kArm64WordSize;
   1684 }
   1685 
   1686 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1687   FPRegister reg = FPRegister(reg_id, kDRegSize);
   1688   __ Str(reg, MemOperand(sp, stack_index));
   1689   return kArm64WordSize;
   1690 }
   1691 
   1692 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1693   FPRegister reg = FPRegister(reg_id, kDRegSize);
   1694   __ Ldr(reg, MemOperand(sp, stack_index));
   1695   return kArm64WordSize;
   1696 }
   1697 
   1698 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
   1699   stream << XRegister(reg);
   1700 }
   1701 
   1702 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
   1703   stream << DRegister(reg);
   1704 }
   1705 
   1706 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
   1707   if (constant->IsIntConstant()) {
   1708     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
   1709   } else if (constant->IsLongConstant()) {
   1710     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
   1711   } else if (constant->IsNullConstant()) {
   1712     __ Mov(Register(destination), 0);
   1713   } else if (constant->IsFloatConstant()) {
   1714     __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
   1715   } else {
   1716     DCHECK(constant->IsDoubleConstant());
   1717     __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
   1718   }
   1719 }
   1720 
   1721 
   1722 static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
   1723   DCHECK(constant.IsConstant());
   1724   HConstant* cst = constant.GetConstant();
   1725   return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
   1726          // Null is mapped to a core W register, which we associate with kPrimInt.
   1727          (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
   1728          (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
   1729          (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
   1730          (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
   1731 }
   1732 
   1733 // Allocate a scratch register from the VIXL pool, querying first into
   1734 // the floating-point register pool, and then the the core register
   1735 // pool.  This is essentially a reimplementation of
   1736 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
   1737 // using a different allocation strategy.
   1738 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
   1739                                                     vixl::aarch64::UseScratchRegisterScope* temps,
   1740                                                     int size_in_bits) {
   1741   return masm->GetScratchFPRegisterList()->IsEmpty()
   1742       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
   1743       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
   1744 }
   1745 
   1746 void CodeGeneratorARM64::MoveLocation(Location destination,
   1747                                       Location source,
   1748                                       Primitive::Type dst_type) {
   1749   if (source.Equals(destination)) {
   1750     return;
   1751   }
   1752 
   1753   // A valid move can always be inferred from the destination and source
   1754   // locations. When moving from and to a register, the argument type can be
   1755   // used to generate 32bit instead of 64bit moves. In debug mode we also
   1756   // checks the coherency of the locations and the type.
   1757   bool unspecified_type = (dst_type == Primitive::kPrimVoid);
   1758 
   1759   if (destination.IsRegister() || destination.IsFpuRegister()) {
   1760     if (unspecified_type) {
   1761       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
   1762       if (source.IsStackSlot() ||
   1763           (src_cst != nullptr && (src_cst->IsIntConstant()
   1764                                   || src_cst->IsFloatConstant()
   1765                                   || src_cst->IsNullConstant()))) {
   1766         // For stack slots and 32bit constants, a 64bit type is appropriate.
   1767         dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
   1768       } else {
   1769         // If the source is a double stack slot or a 64bit constant, a 64bit
   1770         // type is appropriate. Else the source is a register, and since the
   1771         // type has not been specified, we chose a 64bit type to force a 64bit
   1772         // move.
   1773         dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
   1774       }
   1775     }
   1776     DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) ||
   1777            (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type)));
   1778     CPURegister dst = CPURegisterFrom(destination, dst_type);
   1779     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
   1780       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
   1781       __ Ldr(dst, StackOperandFrom(source));
   1782     } else if (source.IsSIMDStackSlot()) {
   1783       __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
   1784     } else if (source.IsConstant()) {
   1785       DCHECK(CoherentConstantAndType(source, dst_type));
   1786       MoveConstant(dst, source.GetConstant());
   1787     } else if (source.IsRegister()) {
   1788       if (destination.IsRegister()) {
   1789         __ Mov(Register(dst), RegisterFrom(source, dst_type));
   1790       } else {
   1791         DCHECK(destination.IsFpuRegister());
   1792         Primitive::Type source_type = Primitive::Is64BitType(dst_type)
   1793             ? Primitive::kPrimLong
   1794             : Primitive::kPrimInt;
   1795         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
   1796       }
   1797     } else {
   1798       DCHECK(source.IsFpuRegister());
   1799       if (destination.IsRegister()) {
   1800         Primitive::Type source_type = Primitive::Is64BitType(dst_type)
   1801             ? Primitive::kPrimDouble
   1802             : Primitive::kPrimFloat;
   1803         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
   1804       } else {
   1805         DCHECK(destination.IsFpuRegister());
   1806         if (GetGraph()->HasSIMD()) {
   1807           __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
   1808         } else {
   1809           __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
   1810         }
   1811       }
   1812     }
   1813   } else if (destination.IsSIMDStackSlot()) {
   1814     if (source.IsFpuRegister()) {
   1815       __ Str(QRegisterFrom(source), StackOperandFrom(destination));
   1816     } else {
   1817       DCHECK(source.IsSIMDStackSlot());
   1818       UseScratchRegisterScope temps(GetVIXLAssembler());
   1819       if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
   1820         Register temp = temps.AcquireX();
   1821         __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
   1822         __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
   1823         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
   1824         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
   1825       } else {
   1826         FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
   1827         __ Ldr(temp, StackOperandFrom(source));
   1828         __ Str(temp, StackOperandFrom(destination));
   1829       }
   1830     }
   1831   } else {  // The destination is not a register. It must be a stack slot.
   1832     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
   1833     if (source.IsRegister() || source.IsFpuRegister()) {
   1834       if (unspecified_type) {
   1835         if (source.IsRegister()) {
   1836           dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
   1837         } else {
   1838           dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
   1839         }
   1840       }
   1841       DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) &&
   1842              (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type)));
   1843       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
   1844     } else if (source.IsConstant()) {
   1845       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
   1846           << source << " " << dst_type;
   1847       UseScratchRegisterScope temps(GetVIXLAssembler());
   1848       HConstant* src_cst = source.GetConstant();
   1849       CPURegister temp;
   1850       if (src_cst->IsZeroBitPattern()) {
   1851         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
   1852             ? Register(xzr)
   1853             : Register(wzr);
   1854       } else {
   1855         if (src_cst->IsIntConstant()) {
   1856           temp = temps.AcquireW();
   1857         } else if (src_cst->IsLongConstant()) {
   1858           temp = temps.AcquireX();
   1859         } else if (src_cst->IsFloatConstant()) {
   1860           temp = temps.AcquireS();
   1861         } else {
   1862           DCHECK(src_cst->IsDoubleConstant());
   1863           temp = temps.AcquireD();
   1864         }
   1865         MoveConstant(temp, src_cst);
   1866       }
   1867       __ Str(temp, StackOperandFrom(destination));
   1868     } else {
   1869       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
   1870       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
   1871       UseScratchRegisterScope temps(GetVIXLAssembler());
   1872       // Use any scratch register (a core or a floating-point one)
   1873       // from VIXL scratch register pools as a temporary.
   1874       //
   1875       // We used to only use the FP scratch register pool, but in some
   1876       // rare cases the only register from this pool (D31) would
   1877       // already be used (e.g. within a ParallelMove instruction, when
   1878       // a move is blocked by a another move requiring a scratch FP
   1879       // register, which would reserve D31). To prevent this issue, we
   1880       // ask for a scratch register of any type (core or FP).
   1881       //
   1882       // Also, we start by asking for a FP scratch register first, as the
   1883       // demand of scratch core registers is higher.  This is why we
   1884       // use AcquireFPOrCoreCPURegisterOfSize instead of
   1885       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
   1886       // allocates core scratch registers first.
   1887       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
   1888           GetVIXLAssembler(),
   1889           &temps,
   1890           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
   1891       __ Ldr(temp, StackOperandFrom(source));
   1892       __ Str(temp, StackOperandFrom(destination));
   1893     }
   1894   }
   1895 }
   1896 
   1897 void CodeGeneratorARM64::Load(Primitive::Type type,
   1898                               CPURegister dst,
   1899                               const MemOperand& src) {
   1900   switch (type) {
   1901     case Primitive::kPrimBoolean:
   1902       __ Ldrb(Register(dst), src);
   1903       break;
   1904     case Primitive::kPrimByte:
   1905       __ Ldrsb(Register(dst), src);
   1906       break;
   1907     case Primitive::kPrimShort:
   1908       __ Ldrsh(Register(dst), src);
   1909       break;
   1910     case Primitive::kPrimChar:
   1911       __ Ldrh(Register(dst), src);
   1912       break;
   1913     case Primitive::kPrimInt:
   1914     case Primitive::kPrimNot:
   1915     case Primitive::kPrimLong:
   1916     case Primitive::kPrimFloat:
   1917     case Primitive::kPrimDouble:
   1918       DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
   1919       __ Ldr(dst, src);
   1920       break;
   1921     case Primitive::kPrimVoid:
   1922       LOG(FATAL) << "Unreachable type " << type;
   1923   }
   1924 }
   1925 
   1926 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
   1927                                      CPURegister dst,
   1928                                      const MemOperand& src,
   1929                                      bool needs_null_check) {
   1930   MacroAssembler* masm = GetVIXLAssembler();
   1931   UseScratchRegisterScope temps(masm);
   1932   Register temp_base = temps.AcquireX();
   1933   Primitive::Type type = instruction->GetType();
   1934 
   1935   DCHECK(!src.IsPreIndex());
   1936   DCHECK(!src.IsPostIndex());
   1937 
   1938   // TODO(vixl): Let the MacroAssembler handle MemOperand.
   1939   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
   1940   {
   1941     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   1942     MemOperand base = MemOperand(temp_base);
   1943     switch (type) {
   1944       case Primitive::kPrimBoolean:
   1945         {
   1946           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1947           __ ldarb(Register(dst), base);
   1948           if (needs_null_check) {
   1949             MaybeRecordImplicitNullCheck(instruction);
   1950           }
   1951         }
   1952         break;
   1953       case Primitive::kPrimByte:
   1954         {
   1955           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1956           __ ldarb(Register(dst), base);
   1957           if (needs_null_check) {
   1958             MaybeRecordImplicitNullCheck(instruction);
   1959           }
   1960         }
   1961         __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
   1962         break;
   1963       case Primitive::kPrimChar:
   1964         {
   1965           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1966           __ ldarh(Register(dst), base);
   1967           if (needs_null_check) {
   1968             MaybeRecordImplicitNullCheck(instruction);
   1969           }
   1970         }
   1971         break;
   1972       case Primitive::kPrimShort:
   1973         {
   1974           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1975           __ ldarh(Register(dst), base);
   1976           if (needs_null_check) {
   1977             MaybeRecordImplicitNullCheck(instruction);
   1978           }
   1979         }
   1980         __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
   1981         break;
   1982       case Primitive::kPrimInt:
   1983       case Primitive::kPrimNot:
   1984       case Primitive::kPrimLong:
   1985         DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
   1986         {
   1987           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   1988           __ ldar(Register(dst), base);
   1989           if (needs_null_check) {
   1990             MaybeRecordImplicitNullCheck(instruction);
   1991           }
   1992         }
   1993         break;
   1994       case Primitive::kPrimFloat:
   1995       case Primitive::kPrimDouble: {
   1996         DCHECK(dst.IsFPRegister());
   1997         DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
   1998 
   1999         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
   2000         {
   2001           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2002           __ ldar(temp, base);
   2003           if (needs_null_check) {
   2004             MaybeRecordImplicitNullCheck(instruction);
   2005           }
   2006         }
   2007         __ Fmov(FPRegister(dst), temp);
   2008         break;
   2009       }
   2010       case Primitive::kPrimVoid:
   2011         LOG(FATAL) << "Unreachable type " << type;
   2012     }
   2013   }
   2014 }
   2015 
   2016 void CodeGeneratorARM64::Store(Primitive::Type type,
   2017                                CPURegister src,
   2018                                const MemOperand& dst) {
   2019   switch (type) {
   2020     case Primitive::kPrimBoolean:
   2021     case Primitive::kPrimByte:
   2022       __ Strb(Register(src), dst);
   2023       break;
   2024     case Primitive::kPrimChar:
   2025     case Primitive::kPrimShort:
   2026       __ Strh(Register(src), dst);
   2027       break;
   2028     case Primitive::kPrimInt:
   2029     case Primitive::kPrimNot:
   2030     case Primitive::kPrimLong:
   2031     case Primitive::kPrimFloat:
   2032     case Primitive::kPrimDouble:
   2033       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
   2034       __ Str(src, dst);
   2035       break;
   2036     case Primitive::kPrimVoid:
   2037       LOG(FATAL) << "Unreachable type " << type;
   2038   }
   2039 }
   2040 
   2041 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
   2042                                       Primitive::Type type,
   2043                                       CPURegister src,
   2044                                       const MemOperand& dst,
   2045                                       bool needs_null_check) {
   2046   MacroAssembler* masm = GetVIXLAssembler();
   2047   UseScratchRegisterScope temps(GetVIXLAssembler());
   2048   Register temp_base = temps.AcquireX();
   2049 
   2050   DCHECK(!dst.IsPreIndex());
   2051   DCHECK(!dst.IsPostIndex());
   2052 
   2053   // TODO(vixl): Let the MacroAssembler handle this.
   2054   Operand op = OperandFromMemOperand(dst);
   2055   __ Add(temp_base, dst.GetBaseRegister(), op);
   2056   MemOperand base = MemOperand(temp_base);
   2057   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   2058   switch (type) {
   2059     case Primitive::kPrimBoolean:
   2060     case Primitive::kPrimByte:
   2061       {
   2062         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2063         __ stlrb(Register(src), base);
   2064         if (needs_null_check) {
   2065           MaybeRecordImplicitNullCheck(instruction);
   2066         }
   2067       }
   2068       break;
   2069     case Primitive::kPrimChar:
   2070     case Primitive::kPrimShort:
   2071       {
   2072         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2073         __ stlrh(Register(src), base);
   2074         if (needs_null_check) {
   2075           MaybeRecordImplicitNullCheck(instruction);
   2076         }
   2077       }
   2078       break;
   2079     case Primitive::kPrimInt:
   2080     case Primitive::kPrimNot:
   2081     case Primitive::kPrimLong:
   2082       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
   2083       {
   2084         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2085         __ stlr(Register(src), base);
   2086         if (needs_null_check) {
   2087           MaybeRecordImplicitNullCheck(instruction);
   2088         }
   2089       }
   2090       break;
   2091     case Primitive::kPrimFloat:
   2092     case Primitive::kPrimDouble: {
   2093       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
   2094       Register temp_src;
   2095       if (src.IsZero()) {
   2096         // The zero register is used to avoid synthesizing zero constants.
   2097         temp_src = Register(src);
   2098       } else {
   2099         DCHECK(src.IsFPRegister());
   2100         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
   2101         __ Fmov(temp_src, FPRegister(src));
   2102       }
   2103       {
   2104         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2105         __ stlr(temp_src, base);
   2106         if (needs_null_check) {
   2107           MaybeRecordImplicitNullCheck(instruction);
   2108         }
   2109       }
   2110       break;
   2111     }
   2112     case Primitive::kPrimVoid:
   2113       LOG(FATAL) << "Unreachable type " << type;
   2114   }
   2115 }
   2116 
   2117 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
   2118                                        HInstruction* instruction,
   2119                                        uint32_t dex_pc,
   2120                                        SlowPathCode* slow_path) {
   2121   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
   2122 
   2123   __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
   2124   {
   2125     // Ensure the pc position is recorded immediately after the `blr` instruction.
   2126     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
   2127     __ blr(lr);
   2128     if (EntrypointRequiresStackMap(entrypoint)) {
   2129       RecordPcInfo(instruction, dex_pc, slow_path);
   2130     }
   2131   }
   2132 }
   2133 
   2134 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
   2135                                                              HInstruction* instruction,
   2136                                                              SlowPathCode* slow_path) {
   2137   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
   2138   __ Ldr(lr, MemOperand(tr, entry_point_offset));
   2139   __ Blr(lr);
   2140 }
   2141 
   2142 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
   2143                                                                      Register class_reg) {
   2144   UseScratchRegisterScope temps(GetVIXLAssembler());
   2145   Register temp = temps.AcquireW();
   2146   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
   2147 
   2148   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   2149   // TODO(vixl): Let the MacroAssembler handle MemOperand.
   2150   __ Add(temp, class_reg, status_offset);
   2151   __ Ldar(temp, HeapOperand(temp));
   2152   __ Cmp(temp, mirror::Class::kStatusInitialized);
   2153   __ B(lt, slow_path->GetEntryLabel());
   2154   __ Bind(slow_path->GetExitLabel());
   2155 }
   2156 
   2157 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   2158   BarrierType type = BarrierAll;
   2159 
   2160   switch (kind) {
   2161     case MemBarrierKind::kAnyAny:
   2162     case MemBarrierKind::kAnyStore: {
   2163       type = BarrierAll;
   2164       break;
   2165     }
   2166     case MemBarrierKind::kLoadAny: {
   2167       type = BarrierReads;
   2168       break;
   2169     }
   2170     case MemBarrierKind::kStoreStore: {
   2171       type = BarrierWrites;
   2172       break;
   2173     }
   2174     default:
   2175       LOG(FATAL) << "Unexpected memory barrier " << kind;
   2176   }
   2177   __ Dmb(InnerShareable, type);
   2178 }
   2179 
   2180 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
   2181                                                          HBasicBlock* successor) {
   2182   SuspendCheckSlowPathARM64* slow_path =
   2183       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
   2184   if (slow_path == nullptr) {
   2185     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
   2186     instruction->SetSlowPath(slow_path);
   2187     codegen_->AddSlowPath(slow_path);
   2188     if (successor != nullptr) {
   2189       DCHECK(successor->IsLoopHeader());
   2190       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
   2191     }
   2192   } else {
   2193     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   2194   }
   2195 
   2196   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   2197   Register temp = temps.AcquireW();
   2198 
   2199   __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
   2200   if (successor == nullptr) {
   2201     __ Cbnz(temp, slow_path->GetEntryLabel());
   2202     __ Bind(slow_path->GetReturnLabel());
   2203   } else {
   2204     __ Cbz(temp, codegen_->GetLabelOf(successor));
   2205     __ B(slow_path->GetEntryLabel());
   2206     // slow_path will return to GetLabelOf(successor).
   2207   }
   2208 }
   2209 
   2210 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
   2211                                                              CodeGeneratorARM64* codegen)
   2212       : InstructionCodeGenerator(graph, codegen),
   2213         assembler_(codegen->GetAssembler()),
   2214         codegen_(codegen) {}
   2215 
   2216 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
   2217   /* No unimplemented IR. */
   2218 
   2219 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
   2220 
   2221 enum UnimplementedInstructionBreakCode {
   2222   // Using a base helps identify when we hit such breakpoints.
   2223   UnimplementedInstructionBreakCodeBaseCode = 0x900,
   2224 #define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name),
   2225   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION)
   2226 #undef ENUM_UNIMPLEMENTED_INSTRUCTION
   2227 };
   2228 
   2229 #define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name)                               \
   2230   void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) {  \
   2231     __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name));                               \
   2232   }                                                                                   \
   2233   void LocationsBuilderARM64::Visit##name(H##name* instr) {                           \
   2234     LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \
   2235     locations->SetOut(Location::Any());                                               \
   2236   }
   2237   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)
   2238 #undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS
   2239 
   2240 #undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE
   2241 #undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
   2242 
   2243 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
   2244   DCHECK_EQ(instr->InputCount(), 2U);
   2245   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   2246   Primitive::Type type = instr->GetResultType();
   2247   switch (type) {
   2248     case Primitive::kPrimInt:
   2249     case Primitive::kPrimLong:
   2250       locations->SetInAt(0, Location::RequiresRegister());
   2251       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
   2252       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2253       break;
   2254 
   2255     case Primitive::kPrimFloat:
   2256     case Primitive::kPrimDouble:
   2257       locations->SetInAt(0, Location::RequiresFpuRegister());
   2258       locations->SetInAt(1, Location::RequiresFpuRegister());
   2259       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2260       break;
   2261 
   2262     default:
   2263       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
   2264   }
   2265 }
   2266 
   2267 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
   2268                                            const FieldInfo& field_info) {
   2269   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   2270 
   2271   bool object_field_get_with_read_barrier =
   2272       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   2273   LocationSummary* locations =
   2274       new (GetGraph()->GetArena()) LocationSummary(instruction,
   2275                                                    object_field_get_with_read_barrier ?
   2276                                                        LocationSummary::kCallOnSlowPath :
   2277                                                        LocationSummary::kNoCall);
   2278   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
   2279     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   2280     // We need a temporary register for the read barrier marking slow
   2281     // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
   2282     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
   2283         !Runtime::Current()->UseJitCompilation() &&
   2284         !field_info.IsVolatile()) {
   2285       // If link-time thunks for the Baker read barrier are enabled, for AOT
   2286       // non-volatile loads we need a temporary only if the offset is too big.
   2287       if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
   2288         locations->AddTemp(FixedTempLocation());
   2289       }
   2290     } else {
   2291       locations->AddTemp(Location::RequiresRegister());
   2292     }
   2293   }
   2294   locations->SetInAt(0, Location::RequiresRegister());
   2295   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   2296     locations->SetOut(Location::RequiresFpuRegister());
   2297   } else {
   2298     // The output overlaps for an object field get when read barriers
   2299     // are enabled: we do not want the load to overwrite the object's
   2300     // location, as we need it to emit the read barrier.
   2301     locations->SetOut(
   2302         Location::RequiresRegister(),
   2303         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   2304   }
   2305 }
   2306 
   2307 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
   2308                                                    const FieldInfo& field_info) {
   2309   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   2310   LocationSummary* locations = instruction->GetLocations();
   2311   Location base_loc = locations->InAt(0);
   2312   Location out = locations->Out();
   2313   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   2314   Primitive::Type field_type = field_info.GetFieldType();
   2315   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
   2316 
   2317   if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2318     // Object FieldGet with Baker's read barrier case.
   2319     // /* HeapReference<Object> */ out = *(base + offset)
   2320     Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
   2321     Location maybe_temp =
   2322         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
   2323     // Note that potential implicit null checks are handled in this
   2324     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
   2325     codegen_->GenerateFieldLoadWithBakerReadBarrier(
   2326         instruction,
   2327         out,
   2328         base,
   2329         offset,
   2330         maybe_temp,
   2331         /* needs_null_check */ true,
   2332         field_info.IsVolatile());
   2333   } else {
   2334     // General case.
   2335     if (field_info.IsVolatile()) {
   2336       // Note that a potential implicit null check is handled in this
   2337       // CodeGeneratorARM64::LoadAcquire call.
   2338       // NB: LoadAcquire will record the pc info if needed.
   2339       codegen_->LoadAcquire(
   2340           instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
   2341     } else {
   2342       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2343       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2344       codegen_->Load(field_type, OutputCPURegister(instruction), field);
   2345       codegen_->MaybeRecordImplicitNullCheck(instruction);
   2346     }
   2347     if (field_type == Primitive::kPrimNot) {
   2348       // If read barriers are enabled, emit read barriers other than
   2349       // Baker's using a slow path (and also unpoison the loaded
   2350       // reference, if heap poisoning is enabled).
   2351       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
   2352     }
   2353   }
   2354 }
   2355 
   2356 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
   2357   LocationSummary* locations =
   2358       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   2359   locations->SetInAt(0, Location::RequiresRegister());
   2360   if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
   2361     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
   2362   } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
   2363     locations->SetInAt(1, Location::RequiresFpuRegister());
   2364   } else {
   2365     locations->SetInAt(1, Location::RequiresRegister());
   2366   }
   2367 }
   2368 
   2369 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
   2370                                                    const FieldInfo& field_info,
   2371                                                    bool value_can_be_null) {
   2372   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   2373 
   2374   Register obj = InputRegisterAt(instruction, 0);
   2375   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
   2376   CPURegister source = value;
   2377   Offset offset = field_info.GetFieldOffset();
   2378   Primitive::Type field_type = field_info.GetFieldType();
   2379 
   2380   {
   2381     // We use a block to end the scratch scope before the write barrier, thus
   2382     // freeing the temporary registers so they can be used in `MarkGCCard`.
   2383     UseScratchRegisterScope temps(GetVIXLAssembler());
   2384 
   2385     if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
   2386       DCHECK(value.IsW());
   2387       Register temp = temps.AcquireW();
   2388       __ Mov(temp, value.W());
   2389       GetAssembler()->PoisonHeapReference(temp.W());
   2390       source = temp;
   2391     }
   2392 
   2393     if (field_info.IsVolatile()) {
   2394       codegen_->StoreRelease(
   2395           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
   2396     } else {
   2397       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   2398       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2399       codegen_->Store(field_type, source, HeapOperand(obj, offset));
   2400       codegen_->MaybeRecordImplicitNullCheck(instruction);
   2401     }
   2402   }
   2403 
   2404   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
   2405     codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
   2406   }
   2407 }
   2408 
   2409 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
   2410   Primitive::Type type = instr->GetType();
   2411 
   2412   switch (type) {
   2413     case Primitive::kPrimInt:
   2414     case Primitive::kPrimLong: {
   2415       Register dst = OutputRegister(instr);
   2416       Register lhs = InputRegisterAt(instr, 0);
   2417       Operand rhs = InputOperandAt(instr, 1);
   2418       if (instr->IsAdd()) {
   2419         __ Add(dst, lhs, rhs);
   2420       } else if (instr->IsAnd()) {
   2421         __ And(dst, lhs, rhs);
   2422       } else if (instr->IsOr()) {
   2423         __ Orr(dst, lhs, rhs);
   2424       } else if (instr->IsSub()) {
   2425         __ Sub(dst, lhs, rhs);
   2426       } else if (instr->IsRor()) {
   2427         if (rhs.IsImmediate()) {
   2428           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
   2429           __ Ror(dst, lhs, shift);
   2430         } else {
   2431           // Ensure shift distance is in the same size register as the result. If
   2432           // we are rotating a long and the shift comes in a w register originally,
   2433           // we don't need to sxtw for use as an x since the shift distances are
   2434           // all & reg_bits - 1.
   2435           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
   2436         }
   2437       } else {
   2438         DCHECK(instr->IsXor());
   2439         __ Eor(dst, lhs, rhs);
   2440       }
   2441       break;
   2442     }
   2443     case Primitive::kPrimFloat:
   2444     case Primitive::kPrimDouble: {
   2445       FPRegister dst = OutputFPRegister(instr);
   2446       FPRegister lhs = InputFPRegisterAt(instr, 0);
   2447       FPRegister rhs = InputFPRegisterAt(instr, 1);
   2448       if (instr->IsAdd()) {
   2449         __ Fadd(dst, lhs, rhs);
   2450       } else if (instr->IsSub()) {
   2451         __ Fsub(dst, lhs, rhs);
   2452       } else {
   2453         LOG(FATAL) << "Unexpected floating-point binary operation";
   2454       }
   2455       break;
   2456     }
   2457     default:
   2458       LOG(FATAL) << "Unexpected binary operation type " << type;
   2459   }
   2460 }
   2461 
   2462 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
   2463   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
   2464 
   2465   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   2466   Primitive::Type type = instr->GetResultType();
   2467   switch (type) {
   2468     case Primitive::kPrimInt:
   2469     case Primitive::kPrimLong: {
   2470       locations->SetInAt(0, Location::RequiresRegister());
   2471       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
   2472       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2473       break;
   2474     }
   2475     default:
   2476       LOG(FATAL) << "Unexpected shift type " << type;
   2477   }
   2478 }
   2479 
   2480 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
   2481   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
   2482 
   2483   Primitive::Type type = instr->GetType();
   2484   switch (type) {
   2485     case Primitive::kPrimInt:
   2486     case Primitive::kPrimLong: {
   2487       Register dst = OutputRegister(instr);
   2488       Register lhs = InputRegisterAt(instr, 0);
   2489       Operand rhs = InputOperandAt(instr, 1);
   2490       if (rhs.IsImmediate()) {
   2491         uint32_t shift_value = rhs.GetImmediate() &
   2492             (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
   2493         if (instr->IsShl()) {
   2494           __ Lsl(dst, lhs, shift_value);
   2495         } else if (instr->IsShr()) {
   2496           __ Asr(dst, lhs, shift_value);
   2497         } else {
   2498           __ Lsr(dst, lhs, shift_value);
   2499         }
   2500       } else {
   2501         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
   2502 
   2503         if (instr->IsShl()) {
   2504           __ Lsl(dst, lhs, rhs_reg);
   2505         } else if (instr->IsShr()) {
   2506           __ Asr(dst, lhs, rhs_reg);
   2507         } else {
   2508           __ Lsr(dst, lhs, rhs_reg);
   2509         }
   2510       }
   2511       break;
   2512     }
   2513     default:
   2514       LOG(FATAL) << "Unexpected shift operation type " << type;
   2515   }
   2516 }
   2517 
   2518 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
   2519   HandleBinaryOp(instruction);
   2520 }
   2521 
   2522 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
   2523   HandleBinaryOp(instruction);
   2524 }
   2525 
   2526 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
   2527   HandleBinaryOp(instruction);
   2528 }
   2529 
   2530 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
   2531   HandleBinaryOp(instruction);
   2532 }
   2533 
   2534 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   2535   DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
   2536   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   2537   locations->SetInAt(0, Location::RequiresRegister());
   2538   // There is no immediate variant of negated bitwise instructions in AArch64.
   2539   locations->SetInAt(1, Location::RequiresRegister());
   2540   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2541 }
   2542 
   2543 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   2544   Register dst = OutputRegister(instr);
   2545   Register lhs = InputRegisterAt(instr, 0);
   2546   Register rhs = InputRegisterAt(instr, 1);
   2547 
   2548   switch (instr->GetOpKind()) {
   2549     case HInstruction::kAnd:
   2550       __ Bic(dst, lhs, rhs);
   2551       break;
   2552     case HInstruction::kOr:
   2553       __ Orn(dst, lhs, rhs);
   2554       break;
   2555     case HInstruction::kXor:
   2556       __ Eon(dst, lhs, rhs);
   2557       break;
   2558     default:
   2559       LOG(FATAL) << "Unreachable";
   2560   }
   2561 }
   2562 
   2563 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
   2564     HDataProcWithShifterOp* instruction) {
   2565   DCHECK(instruction->GetType() == Primitive::kPrimInt ||
   2566          instruction->GetType() == Primitive::kPrimLong);
   2567   LocationSummary* locations =
   2568       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   2569   if (instruction->GetInstrKind() == HInstruction::kNeg) {
   2570     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
   2571   } else {
   2572     locations->SetInAt(0, Location::RequiresRegister());
   2573   }
   2574   locations->SetInAt(1, Location::RequiresRegister());
   2575   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2576 }
   2577 
   2578 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
   2579     HDataProcWithShifterOp* instruction) {
   2580   Primitive::Type type = instruction->GetType();
   2581   HInstruction::InstructionKind kind = instruction->GetInstrKind();
   2582   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
   2583   Register out = OutputRegister(instruction);
   2584   Register left;
   2585   if (kind != HInstruction::kNeg) {
   2586     left = InputRegisterAt(instruction, 0);
   2587   }
   2588   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
   2589   // shifter operand operation, the IR generating `right_reg` (input to the type
   2590   // conversion) can have a different type from the current instruction's type,
   2591   // so we manually indicate the type.
   2592   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
   2593   Operand right_operand(0);
   2594 
   2595   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
   2596   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
   2597     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
   2598   } else {
   2599     right_operand = Operand(right_reg,
   2600                             helpers::ShiftFromOpKind(op_kind),
   2601                             instruction->GetShiftAmount());
   2602   }
   2603 
   2604   // Logical binary operations do not support extension operations in the
   2605   // operand. Note that VIXL would still manage if it was passed by generating
   2606   // the extension as a separate instruction.
   2607   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
   2608   DCHECK(!right_operand.IsExtendedRegister() ||
   2609          (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
   2610           kind != HInstruction::kNeg));
   2611   switch (kind) {
   2612     case HInstruction::kAdd:
   2613       __ Add(out, left, right_operand);
   2614       break;
   2615     case HInstruction::kAnd:
   2616       __ And(out, left, right_operand);
   2617       break;
   2618     case HInstruction::kNeg:
   2619       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
   2620       __ Neg(out, right_operand);
   2621       break;
   2622     case HInstruction::kOr:
   2623       __ Orr(out, left, right_operand);
   2624       break;
   2625     case HInstruction::kSub:
   2626       __ Sub(out, left, right_operand);
   2627       break;
   2628     case HInstruction::kXor:
   2629       __ Eor(out, left, right_operand);
   2630       break;
   2631     default:
   2632       LOG(FATAL) << "Unexpected operation kind: " << kind;
   2633       UNREACHABLE();
   2634   }
   2635 }
   2636 
   2637 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   2638   LocationSummary* locations =
   2639       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   2640   locations->SetInAt(0, Location::RequiresRegister());
   2641   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
   2642   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2643 }
   2644 
   2645 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   2646   __ Add(OutputRegister(instruction),
   2647          InputRegisterAt(instruction, 0),
   2648          Operand(InputOperandAt(instruction, 1)));
   2649 }
   2650 
   2651 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   2652   LocationSummary* locations =
   2653       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
   2654   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
   2655   if (instr->GetOpKind() == HInstruction::kSub &&
   2656       accumulator->IsConstant() &&
   2657       accumulator->AsConstant()->IsArithmeticZero()) {
   2658     // Don't allocate register for Mneg instruction.
   2659   } else {
   2660     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
   2661                        Location::RequiresRegister());
   2662   }
   2663   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
   2664   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
   2665   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2666 }
   2667 
   2668 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   2669   Register res = OutputRegister(instr);
   2670   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
   2671   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
   2672 
   2673   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
   2674   // This fixup should be carried out for all multiply-accumulate instructions:
   2675   // madd, msub, smaddl, smsubl, umaddl and umsubl.
   2676   if (instr->GetType() == Primitive::kPrimLong &&
   2677       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
   2678     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
   2679     vixl::aarch64::Instruction* prev =
   2680         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
   2681     if (prev->IsLoadOrStore()) {
   2682       // Make sure we emit only exactly one nop.
   2683       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
   2684       __ nop();
   2685     }
   2686   }
   2687 
   2688   if (instr->GetOpKind() == HInstruction::kAdd) {
   2689     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
   2690     __ Madd(res, mul_left, mul_right, accumulator);
   2691   } else {
   2692     DCHECK(instr->GetOpKind() == HInstruction::kSub);
   2693     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
   2694     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
   2695       __ Mneg(res, mul_left, mul_right);
   2696     } else {
   2697       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
   2698       __ Msub(res, mul_left, mul_right, accumulator);
   2699     }
   2700   }
   2701 }
   2702 
   2703 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
   2704   bool object_array_get_with_read_barrier =
   2705       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   2706   LocationSummary* locations =
   2707       new (GetGraph()->GetArena()) LocationSummary(instruction,
   2708                                                    object_array_get_with_read_barrier ?
   2709                                                        LocationSummary::kCallOnSlowPath :
   2710                                                        LocationSummary::kNoCall);
   2711   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
   2712     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   2713     // We need a temporary register for the read barrier marking slow
   2714     // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
   2715     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
   2716         !Runtime::Current()->UseJitCompilation() &&
   2717         instruction->GetIndex()->IsConstant()) {
   2718       // Array loads with constant index are treated as field loads.
   2719       // If link-time thunks for the Baker read barrier are enabled, for AOT
   2720       // constant index loads we need a temporary only if the offset is too big.
   2721       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
   2722       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
   2723       offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
   2724       if (offset >= kReferenceLoadMinFarOffset) {
   2725         locations->AddTemp(FixedTempLocation());
   2726       }
   2727     } else {
   2728       locations->AddTemp(Location::RequiresRegister());
   2729     }
   2730   }
   2731   locations->SetInAt(0, Location::RequiresRegister());
   2732   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   2733   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   2734     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2735   } else {
   2736     // The output overlaps in the case of an object array get with
   2737     // read barriers enabled: we do not want the move to overwrite the
   2738     // array's location, as we need it to emit the read barrier.
   2739     locations->SetOut(
   2740         Location::RequiresRegister(),
   2741         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   2742   }
   2743 }
   2744 
   2745 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
   2746   Primitive::Type type = instruction->GetType();
   2747   Register obj = InputRegisterAt(instruction, 0);
   2748   LocationSummary* locations = instruction->GetLocations();
   2749   Location index = locations->InAt(1);
   2750   Location out = locations->Out();
   2751   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
   2752   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
   2753                                         instruction->IsStringCharAt();
   2754   MacroAssembler* masm = GetVIXLAssembler();
   2755   UseScratchRegisterScope temps(masm);
   2756 
   2757   // The read barrier instrumentation of object ArrayGet instructions
   2758   // does not support the HIntermediateAddress instruction.
   2759   DCHECK(!((type == Primitive::kPrimNot) &&
   2760            instruction->GetArray()->IsIntermediateAddress() &&
   2761            kEmitCompilerReadBarrier));
   2762 
   2763   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   2764     // Object ArrayGet with Baker's read barrier case.
   2765     // Note that a potential implicit null check is handled in the
   2766     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
   2767     if (index.IsConstant()) {
   2768       // Array load with a constant index can be treated as a field load.
   2769       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
   2770       Location maybe_temp =
   2771           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
   2772       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
   2773                                                       out,
   2774                                                       obj.W(),
   2775                                                       offset,
   2776                                                       maybe_temp,
   2777                                                       /* needs_null_check */ true,
   2778                                                       /* use_load_acquire */ false);
   2779     } else {
   2780       Register temp = WRegisterFrom(locations->GetTemp(0));
   2781       codegen_->GenerateArrayLoadWithBakerReadBarrier(
   2782           instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
   2783     }
   2784   } else {
   2785     // General case.
   2786     MemOperand source = HeapOperand(obj);
   2787     Register length;
   2788     if (maybe_compressed_char_at) {
   2789       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   2790       length = temps.AcquireW();
   2791       {
   2792         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2793         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2794 
   2795         if (instruction->GetArray()->IsIntermediateAddress()) {
   2796           DCHECK_LT(count_offset, offset);
   2797           int64_t adjusted_offset =
   2798               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
   2799           // Note that `adjusted_offset` is negative, so this will be a LDUR.
   2800           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
   2801         } else {
   2802           __ Ldr(length, HeapOperand(obj, count_offset));
   2803         }
   2804         codegen_->MaybeRecordImplicitNullCheck(instruction);
   2805       }
   2806     }
   2807     if (index.IsConstant()) {
   2808       if (maybe_compressed_char_at) {
   2809         vixl::aarch64::Label uncompressed_load, done;
   2810         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   2811                       "Expecting 0=compressed, 1=uncompressed");
   2812         __ Tbnz(length.W(), 0, &uncompressed_load);
   2813         __ Ldrb(Register(OutputCPURegister(instruction)),
   2814                 HeapOperand(obj, offset + Int64ConstantFrom(index)));
   2815         __ B(&done);
   2816         __ Bind(&uncompressed_load);
   2817         __ Ldrh(Register(OutputCPURegister(instruction)),
   2818                 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
   2819         __ Bind(&done);
   2820       } else {
   2821         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
   2822         source = HeapOperand(obj, offset);
   2823       }
   2824     } else {
   2825       Register temp = temps.AcquireSameSizeAs(obj);
   2826       if (instruction->GetArray()->IsIntermediateAddress()) {
   2827         // We do not need to compute the intermediate address from the array: the
   2828         // input instruction has done it already. See the comment in
   2829         // `TryExtractArrayAccessAddress()`.
   2830         if (kIsDebugBuild) {
   2831           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
   2832           DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
   2833         }
   2834         temp = obj;
   2835       } else {
   2836         __ Add(temp, obj, offset);
   2837       }
   2838       if (maybe_compressed_char_at) {
   2839         vixl::aarch64::Label uncompressed_load, done;
   2840         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   2841                       "Expecting 0=compressed, 1=uncompressed");
   2842         __ Tbnz(length.W(), 0, &uncompressed_load);
   2843         __ Ldrb(Register(OutputCPURegister(instruction)),
   2844                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
   2845         __ B(&done);
   2846         __ Bind(&uncompressed_load);
   2847         __ Ldrh(Register(OutputCPURegister(instruction)),
   2848                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
   2849         __ Bind(&done);
   2850       } else {
   2851         source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
   2852       }
   2853     }
   2854     if (!maybe_compressed_char_at) {
   2855       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2856       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2857       codegen_->Load(type, OutputCPURegister(instruction), source);
   2858       codegen_->MaybeRecordImplicitNullCheck(instruction);
   2859     }
   2860 
   2861     if (type == Primitive::kPrimNot) {
   2862       static_assert(
   2863           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   2864           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   2865       Location obj_loc = locations->InAt(0);
   2866       if (index.IsConstant()) {
   2867         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
   2868       } else {
   2869         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
   2870       }
   2871     }
   2872   }
   2873 }
   2874 
   2875 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
   2876   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   2877   locations->SetInAt(0, Location::RequiresRegister());
   2878   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2879 }
   2880 
   2881 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
   2882   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   2883   vixl::aarch64::Register out = OutputRegister(instruction);
   2884   {
   2885     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   2886     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2887     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
   2888     codegen_->MaybeRecordImplicitNullCheck(instruction);
   2889   }
   2890   // Mask out compression flag from String's array length.
   2891   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
   2892     __ Lsr(out.W(), out.W(), 1u);
   2893   }
   2894 }
   2895 
   2896 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
   2897   Primitive::Type value_type = instruction->GetComponentType();
   2898 
   2899   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   2900   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
   2901       instruction,
   2902       may_need_runtime_call_for_type_check ?
   2903           LocationSummary::kCallOnSlowPath :
   2904           LocationSummary::kNoCall);
   2905   locations->SetInAt(0, Location::RequiresRegister());
   2906   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   2907   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
   2908     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
   2909   } else if (Primitive::IsFloatingPointType(value_type)) {
   2910     locations->SetInAt(2, Location::RequiresFpuRegister());
   2911   } else {
   2912     locations->SetInAt(2, Location::RequiresRegister());
   2913   }
   2914 }
   2915 
   2916 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   2917   Primitive::Type value_type = instruction->GetComponentType();
   2918   LocationSummary* locations = instruction->GetLocations();
   2919   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   2920   bool needs_write_barrier =
   2921       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   2922 
   2923   Register array = InputRegisterAt(instruction, 0);
   2924   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
   2925   CPURegister source = value;
   2926   Location index = locations->InAt(1);
   2927   size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
   2928   MemOperand destination = HeapOperand(array);
   2929   MacroAssembler* masm = GetVIXLAssembler();
   2930 
   2931   if (!needs_write_barrier) {
   2932     DCHECK(!may_need_runtime_call_for_type_check);
   2933     if (index.IsConstant()) {
   2934       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
   2935       destination = HeapOperand(array, offset);
   2936     } else {
   2937       UseScratchRegisterScope temps(masm);
   2938       Register temp = temps.AcquireSameSizeAs(array);
   2939       if (instruction->GetArray()->IsIntermediateAddress()) {
   2940         // We do not need to compute the intermediate address from the array: the
   2941         // input instruction has done it already. See the comment in
   2942         // `TryExtractArrayAccessAddress()`.
   2943         if (kIsDebugBuild) {
   2944           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
   2945           DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
   2946         }
   2947         temp = array;
   2948       } else {
   2949         __ Add(temp, array, offset);
   2950       }
   2951       destination = HeapOperand(temp,
   2952                                 XRegisterFrom(index),
   2953                                 LSL,
   2954                                 Primitive::ComponentSizeShift(value_type));
   2955     }
   2956     {
   2957       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   2958       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2959       codegen_->Store(value_type, value, destination);
   2960       codegen_->MaybeRecordImplicitNullCheck(instruction);
   2961     }
   2962   } else {
   2963     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
   2964     vixl::aarch64::Label done;
   2965     SlowPathCodeARM64* slow_path = nullptr;
   2966     {
   2967       // We use a block to end the scratch scope before the write barrier, thus
   2968       // freeing the temporary registers so they can be used in `MarkGCCard`.
   2969       UseScratchRegisterScope temps(masm);
   2970       Register temp = temps.AcquireSameSizeAs(array);
   2971       if (index.IsConstant()) {
   2972         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
   2973         destination = HeapOperand(array, offset);
   2974       } else {
   2975         destination = HeapOperand(temp,
   2976                                   XRegisterFrom(index),
   2977                                   LSL,
   2978                                   Primitive::ComponentSizeShift(value_type));
   2979       }
   2980 
   2981       uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   2982       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   2983       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   2984 
   2985       if (may_need_runtime_call_for_type_check) {
   2986         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
   2987         codegen_->AddSlowPath(slow_path);
   2988         if (instruction->GetValueCanBeNull()) {
   2989           vixl::aarch64::Label non_zero;
   2990           __ Cbnz(Register(value), &non_zero);
   2991           if (!index.IsConstant()) {
   2992             __ Add(temp, array, offset);
   2993           }
   2994           {
   2995             // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
   2996             // emitted.
   2997             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   2998             __ Str(wzr, destination);
   2999             codegen_->MaybeRecordImplicitNullCheck(instruction);
   3000           }
   3001           __ B(&done);
   3002           __ Bind(&non_zero);
   3003         }
   3004 
   3005         // Note that when Baker read barriers are enabled, the type
   3006         // checks are performed without read barriers.  This is fine,
   3007         // even in the case where a class object is in the from-space
   3008         // after the flip, as a comparison involving such a type would
   3009         // not produce a false positive; it may of course produce a
   3010         // false negative, in which case we would take the ArraySet
   3011         // slow path.
   3012 
   3013         Register temp2 = temps.AcquireSameSizeAs(array);
   3014         // /* HeapReference<Class> */ temp = array->klass_
   3015         {
   3016           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   3017           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   3018           __ Ldr(temp, HeapOperand(array, class_offset));
   3019           codegen_->MaybeRecordImplicitNullCheck(instruction);
   3020         }
   3021         GetAssembler()->MaybeUnpoisonHeapReference(temp);
   3022 
   3023         // /* HeapReference<Class> */ temp = temp->component_type_
   3024         __ Ldr(temp, HeapOperand(temp, component_offset));
   3025         // /* HeapReference<Class> */ temp2 = value->klass_
   3026         __ Ldr(temp2, HeapOperand(Register(value), class_offset));
   3027         // If heap poisoning is enabled, no need to unpoison `temp`
   3028         // nor `temp2`, as we are comparing two poisoned references.
   3029         __ Cmp(temp, temp2);
   3030         temps.Release(temp2);
   3031 
   3032         if (instruction->StaticTypeOfArrayIsObjectArray()) {
   3033           vixl::aarch64::Label do_put;
   3034           __ B(eq, &do_put);
   3035           // If heap poisoning is enabled, the `temp` reference has
   3036           // not been unpoisoned yet; unpoison it now.
   3037           GetAssembler()->MaybeUnpoisonHeapReference(temp);
   3038 
   3039           // /* HeapReference<Class> */ temp = temp->super_class_
   3040           __ Ldr(temp, HeapOperand(temp, super_offset));
   3041           // If heap poisoning is enabled, no need to unpoison
   3042           // `temp`, as we are comparing against null below.
   3043           __ Cbnz(temp, slow_path->GetEntryLabel());
   3044           __ Bind(&do_put);
   3045         } else {
   3046           __ B(ne, slow_path->GetEntryLabel());
   3047         }
   3048       }
   3049 
   3050       if (kPoisonHeapReferences) {
   3051         Register temp2 = temps.AcquireSameSizeAs(array);
   3052           DCHECK(value.IsW());
   3053         __ Mov(temp2, value.W());
   3054         GetAssembler()->PoisonHeapReference(temp2);
   3055         source = temp2;
   3056       }
   3057 
   3058       if (!index.IsConstant()) {
   3059         __ Add(temp, array, offset);
   3060       } else {
   3061         // We no longer need the `temp` here so release it as the store below may
   3062         // need a scratch register (if the constant index makes the offset too large)
   3063         // and the poisoned `source` could be using the other scratch register.
   3064         temps.Release(temp);
   3065       }
   3066       {
   3067         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   3068         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   3069         __ Str(source, destination);
   3070 
   3071         if (!may_need_runtime_call_for_type_check) {
   3072           codegen_->MaybeRecordImplicitNullCheck(instruction);
   3073         }
   3074       }
   3075     }
   3076 
   3077     codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
   3078 
   3079     if (done.IsLinked()) {
   3080       __ Bind(&done);
   3081     }
   3082 
   3083     if (slow_path != nullptr) {
   3084       __ Bind(slow_path->GetExitLabel());
   3085     }
   3086   }
   3087 }
   3088 
   3089 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
   3090   RegisterSet caller_saves = RegisterSet::Empty();
   3091   InvokeRuntimeCallingConvention calling_convention;
   3092   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   3093   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
   3094   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
   3095   locations->SetInAt(0, Location::RequiresRegister());
   3096   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   3097 }
   3098 
   3099 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
   3100   BoundsCheckSlowPathARM64* slow_path =
   3101       new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
   3102   codegen_->AddSlowPath(slow_path);
   3103   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
   3104   __ B(slow_path->GetEntryLabel(), hs);
   3105 }
   3106 
   3107 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
   3108   LocationSummary* locations =
   3109       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
   3110   locations->SetInAt(0, Location::RequiresRegister());
   3111   if (check->HasUses()) {
   3112     locations->SetOut(Location::SameAsFirstInput());
   3113   }
   3114 }
   3115 
   3116 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
   3117   // We assume the class is not null.
   3118   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
   3119       check->GetLoadClass(), check, check->GetDexPc(), true);
   3120   codegen_->AddSlowPath(slow_path);
   3121   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
   3122 }
   3123 
   3124 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
   3125   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
   3126       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
   3127 }
   3128 
   3129 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
   3130   FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
   3131   Location rhs_loc = instruction->GetLocations()->InAt(1);
   3132   if (rhs_loc.IsConstant()) {
   3133     // 0.0 is the only immediate that can be encoded directly in
   3134     // an FCMP instruction.
   3135     //
   3136     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
   3137     // specify that in a floating-point comparison, positive zero
   3138     // and negative zero are considered equal, so we can use the
   3139     // literal 0.0 for both cases here.
   3140     //
   3141     // Note however that some methods (Float.equal, Float.compare,
   3142     // Float.compareTo, Double.equal, Double.compare,
   3143     // Double.compareTo, Math.max, Math.min, StrictMath.max,
   3144     // StrictMath.min) consider 0.0 to be (strictly) greater than
   3145     // -0.0. So if we ever translate calls to these methods into a
   3146     // HCompare instruction, we must handle the -0.0 case with
   3147     // care here.
   3148     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
   3149     __ Fcmp(lhs_reg, 0.0);
   3150   } else {
   3151     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
   3152   }
   3153 }
   3154 
   3155 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
   3156   LocationSummary* locations =
   3157       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   3158   Primitive::Type in_type = compare->InputAt(0)->GetType();
   3159   switch (in_type) {
   3160     case Primitive::kPrimBoolean:
   3161     case Primitive::kPrimByte:
   3162     case Primitive::kPrimShort:
   3163     case Primitive::kPrimChar:
   3164     case Primitive::kPrimInt:
   3165     case Primitive::kPrimLong: {
   3166       locations->SetInAt(0, Location::RequiresRegister());
   3167       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
   3168       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3169       break;
   3170     }
   3171     case Primitive::kPrimFloat:
   3172     case Primitive::kPrimDouble: {
   3173       locations->SetInAt(0, Location::RequiresFpuRegister());
   3174       locations->SetInAt(1,
   3175                          IsFloatingPointZeroConstant(compare->InputAt(1))
   3176                              ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
   3177                              : Location::RequiresFpuRegister());
   3178       locations->SetOut(Location::RequiresRegister());
   3179       break;
   3180     }
   3181     default:
   3182       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
   3183   }
   3184 }
   3185 
   3186 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
   3187   Primitive::Type in_type = compare->InputAt(0)->GetType();
   3188 
   3189   //  0 if: left == right
   3190   //  1 if: left  > right
   3191   // -1 if: left  < right
   3192   switch (in_type) {
   3193     case Primitive::kPrimBoolean:
   3194     case Primitive::kPrimByte:
   3195     case Primitive::kPrimShort:
   3196     case Primitive::kPrimChar:
   3197     case Primitive::kPrimInt:
   3198     case Primitive::kPrimLong: {
   3199       Register result = OutputRegister(compare);
   3200       Register left = InputRegisterAt(compare, 0);
   3201       Operand right = InputOperandAt(compare, 1);
   3202       __ Cmp(left, right);
   3203       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
   3204       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
   3205       break;
   3206     }
   3207     case Primitive::kPrimFloat:
   3208     case Primitive::kPrimDouble: {
   3209       Register result = OutputRegister(compare);
   3210       GenerateFcmp(compare);
   3211       __ Cset(result, ne);
   3212       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
   3213       break;
   3214     }
   3215     default:
   3216       LOG(FATAL) << "Unimplemented compare type " << in_type;
   3217   }
   3218 }
   3219 
   3220 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
   3221   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   3222 
   3223   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
   3224     locations->SetInAt(0, Location::RequiresFpuRegister());
   3225     locations->SetInAt(1,
   3226                        IsFloatingPointZeroConstant(instruction->InputAt(1))
   3227                            ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
   3228                            : Location::RequiresFpuRegister());
   3229   } else {
   3230     // Integer cases.
   3231     locations->SetInAt(0, Location::RequiresRegister());
   3232     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   3233   }
   3234 
   3235   if (!instruction->IsEmittedAtUseSite()) {
   3236     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3237   }
   3238 }
   3239 
   3240 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
   3241   if (instruction->IsEmittedAtUseSite()) {
   3242     return;
   3243   }
   3244 
   3245   LocationSummary* locations = instruction->GetLocations();
   3246   Register res = RegisterFrom(locations->Out(), instruction->GetType());
   3247   IfCondition if_cond = instruction->GetCondition();
   3248 
   3249   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
   3250     GenerateFcmp(instruction);
   3251     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
   3252   } else {
   3253     // Integer cases.
   3254     Register lhs = InputRegisterAt(instruction, 0);
   3255     Operand rhs = InputOperandAt(instruction, 1);
   3256     __ Cmp(lhs, rhs);
   3257     __ Cset(res, ARM64Condition(if_cond));
   3258   }
   3259 }
   3260 
   3261 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
   3262   M(Equal)                                                                               \
   3263   M(NotEqual)                                                                            \
   3264   M(LessThan)                                                                            \
   3265   M(LessThanOrEqual)                                                                     \
   3266   M(GreaterThan)                                                                         \
   3267   M(GreaterThanOrEqual)                                                                  \
   3268   M(Below)                                                                               \
   3269   M(BelowOrEqual)                                                                        \
   3270   M(Above)                                                                               \
   3271   M(AboveOrEqual)
   3272 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
   3273 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
   3274 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
   3275 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
   3276 #undef DEFINE_CONDITION_VISITORS
   3277 #undef FOR_EACH_CONDITION_INSTRUCTION
   3278 
   3279 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
   3280   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3281 
   3282   LocationSummary* locations = instruction->GetLocations();
   3283   Location second = locations->InAt(1);
   3284   DCHECK(second.IsConstant());
   3285 
   3286   Register out = OutputRegister(instruction);
   3287   Register dividend = InputRegisterAt(instruction, 0);
   3288   int64_t imm = Int64FromConstant(second.GetConstant());
   3289   DCHECK(imm == 1 || imm == -1);
   3290 
   3291   if (instruction->IsRem()) {
   3292     __ Mov(out, 0);
   3293   } else {
   3294     if (imm == 1) {
   3295       __ Mov(out, dividend);
   3296     } else {
   3297       __ Neg(out, dividend);
   3298     }
   3299   }
   3300 }
   3301 
   3302 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
   3303   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3304 
   3305   LocationSummary* locations = instruction->GetLocations();
   3306   Location second = locations->InAt(1);
   3307   DCHECK(second.IsConstant());
   3308 
   3309   Register out = OutputRegister(instruction);
   3310   Register dividend = InputRegisterAt(instruction, 0);
   3311   int64_t imm = Int64FromConstant(second.GetConstant());
   3312   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   3313   int ctz_imm = CTZ(abs_imm);
   3314 
   3315   UseScratchRegisterScope temps(GetVIXLAssembler());
   3316   Register temp = temps.AcquireSameSizeAs(out);
   3317 
   3318   if (instruction->IsDiv()) {
   3319     __ Add(temp, dividend, abs_imm - 1);
   3320     __ Cmp(dividend, 0);
   3321     __ Csel(out, temp, dividend, lt);
   3322     if (imm > 0) {
   3323       __ Asr(out, out, ctz_imm);
   3324     } else {
   3325       __ Neg(out, Operand(out, ASR, ctz_imm));
   3326     }
   3327   } else {
   3328     int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64;
   3329     __ Asr(temp, dividend, bits - 1);
   3330     __ Lsr(temp, temp, bits - ctz_imm);
   3331     __ Add(out, dividend, temp);
   3332     __ And(out, out, abs_imm - 1);
   3333     __ Sub(out, out, temp);
   3334   }
   3335 }
   3336 
   3337 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
   3338   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3339 
   3340   LocationSummary* locations = instruction->GetLocations();
   3341   Location second = locations->InAt(1);
   3342   DCHECK(second.IsConstant());
   3343 
   3344   Register out = OutputRegister(instruction);
   3345   Register dividend = InputRegisterAt(instruction, 0);
   3346   int64_t imm = Int64FromConstant(second.GetConstant());
   3347 
   3348   Primitive::Type type = instruction->GetResultType();
   3349   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
   3350 
   3351   int64_t magic;
   3352   int shift;
   3353   CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift);
   3354 
   3355   UseScratchRegisterScope temps(GetVIXLAssembler());
   3356   Register temp = temps.AcquireSameSizeAs(out);
   3357 
   3358   // temp = get_high(dividend * magic)
   3359   __ Mov(temp, magic);
   3360   if (type == Primitive::kPrimLong) {
   3361     __ Smulh(temp, dividend, temp);
   3362   } else {
   3363     __ Smull(temp.X(), dividend, temp);
   3364     __ Lsr(temp.X(), temp.X(), 32);
   3365   }
   3366 
   3367   if (imm > 0 && magic < 0) {
   3368     __ Add(temp, temp, dividend);
   3369   } else if (imm < 0 && magic > 0) {
   3370     __ Sub(temp, temp, dividend);
   3371   }
   3372 
   3373   if (shift != 0) {
   3374     __ Asr(temp, temp, shift);
   3375   }
   3376 
   3377   if (instruction->IsDiv()) {
   3378     __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
   3379   } else {
   3380     __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
   3381     // TODO: Strength reduction for msub.
   3382     Register temp_imm = temps.AcquireSameSizeAs(out);
   3383     __ Mov(temp_imm, imm);
   3384     __ Msub(out, temp, temp_imm, dividend);
   3385   }
   3386 }
   3387 
   3388 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   3389   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3390   Primitive::Type type = instruction->GetResultType();
   3391   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
   3392 
   3393   LocationSummary* locations = instruction->GetLocations();
   3394   Register out = OutputRegister(instruction);
   3395   Location second = locations->InAt(1);
   3396 
   3397   if (second.IsConstant()) {
   3398     int64_t imm = Int64FromConstant(second.GetConstant());
   3399 
   3400     if (imm == 0) {
   3401       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   3402     } else if (imm == 1 || imm == -1) {
   3403       DivRemOneOrMinusOne(instruction);
   3404     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
   3405       DivRemByPowerOfTwo(instruction);
   3406     } else {
   3407       DCHECK(imm <= -2 || imm >= 2);
   3408       GenerateDivRemWithAnyConstant(instruction);
   3409     }
   3410   } else {
   3411     Register dividend = InputRegisterAt(instruction, 0);
   3412     Register divisor = InputRegisterAt(instruction, 1);
   3413     if (instruction->IsDiv()) {
   3414       __ Sdiv(out, dividend, divisor);
   3415     } else {
   3416       UseScratchRegisterScope temps(GetVIXLAssembler());
   3417       Register temp = temps.AcquireSameSizeAs(out);
   3418       __ Sdiv(temp, dividend, divisor);
   3419       __ Msub(out, temp, divisor, dividend);
   3420     }
   3421   }
   3422 }
   3423 
   3424 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
   3425   LocationSummary* locations =
   3426       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
   3427   switch (div->GetResultType()) {
   3428     case Primitive::kPrimInt:
   3429     case Primitive::kPrimLong:
   3430       locations->SetInAt(0, Location::RequiresRegister());
   3431       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
   3432       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3433       break;
   3434 
   3435     case Primitive::kPrimFloat:
   3436     case Primitive::kPrimDouble:
   3437       locations->SetInAt(0, Location::RequiresFpuRegister());
   3438       locations->SetInAt(1, Location::RequiresFpuRegister());
   3439       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   3440       break;
   3441 
   3442     default:
   3443       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3444   }
   3445 }
   3446 
   3447 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
   3448   Primitive::Type type = div->GetResultType();
   3449   switch (type) {
   3450     case Primitive::kPrimInt:
   3451     case Primitive::kPrimLong:
   3452       GenerateDivRemIntegral(div);
   3453       break;
   3454 
   3455     case Primitive::kPrimFloat:
   3456     case Primitive::kPrimDouble:
   3457       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
   3458       break;
   3459 
   3460     default:
   3461       LOG(FATAL) << "Unexpected div type " << type;
   3462   }
   3463 }
   3464 
   3465 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3466   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   3467   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   3468 }
   3469 
   3470 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3471   SlowPathCodeARM64* slow_path =
   3472       new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction);
   3473   codegen_->AddSlowPath(slow_path);
   3474   Location value = instruction->GetLocations()->InAt(0);
   3475 
   3476   Primitive::Type type = instruction->GetType();
   3477 
   3478   if (!Primitive::IsIntegralType(type)) {
   3479     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
   3480     return;
   3481   }
   3482 
   3483   if (value.IsConstant()) {
   3484     int64_t divisor = Int64ConstantFrom(value);
   3485     if (divisor == 0) {
   3486       __ B(slow_path->GetEntryLabel());
   3487     } else {
   3488       // A division by a non-null constant is valid. We don't need to perform
   3489       // any check, so simply fall through.
   3490     }
   3491   } else {
   3492     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
   3493   }
   3494 }
   3495 
   3496 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
   3497   LocationSummary* locations =
   3498       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   3499   locations->SetOut(Location::ConstantLocation(constant));
   3500 }
   3501 
   3502 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
   3503     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   3504   // Will be generated at use site.
   3505 }
   3506 
   3507 void LocationsBuilderARM64::VisitExit(HExit* exit) {
   3508   exit->SetLocations(nullptr);
   3509 }
   3510 
   3511 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
   3512 }
   3513 
   3514 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
   3515   LocationSummary* locations =
   3516       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   3517   locations->SetOut(Location::ConstantLocation(constant));
   3518 }
   3519 
   3520 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   3521   // Will be generated at use site.
   3522 }
   3523 
   3524 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   3525   DCHECK(!successor->IsExitBlock());
   3526   HBasicBlock* block = got->GetBlock();
   3527   HInstruction* previous = got->GetPrevious();
   3528   HLoopInformation* info = block->GetLoopInformation();
   3529 
   3530   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
   3531     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
   3532     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
   3533     return;
   3534   }
   3535   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
   3536     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   3537   }
   3538   if (!codegen_->GoesToNextBlock(block, successor)) {
   3539     __ B(codegen_->GetLabelOf(successor));
   3540   }
   3541 }
   3542 
   3543 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
   3544   got->SetLocations(nullptr);
   3545 }
   3546 
   3547 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
   3548   HandleGoto(got, got->GetSuccessor());
   3549 }
   3550 
   3551 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
   3552   try_boundary->SetLocations(nullptr);
   3553 }
   3554 
   3555 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
   3556   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
   3557   if (!successor->IsExitBlock()) {
   3558     HandleGoto(try_boundary, successor);
   3559   }
   3560 }
   3561 
   3562 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
   3563                                                           size_t condition_input_index,
   3564                                                           vixl::aarch64::Label* true_target,
   3565                                                           vixl::aarch64::Label* false_target) {
   3566   HInstruction* cond = instruction->InputAt(condition_input_index);
   3567 
   3568   if (true_target == nullptr && false_target == nullptr) {
   3569     // Nothing to do. The code always falls through.
   3570     return;
   3571   } else if (cond->IsIntConstant()) {
   3572     // Constant condition, statically compared against "true" (integer value 1).
   3573     if (cond->AsIntConstant()->IsTrue()) {
   3574       if (true_target != nullptr) {
   3575         __ B(true_target);
   3576       }
   3577     } else {
   3578       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
   3579       if (false_target != nullptr) {
   3580         __ B(false_target);
   3581       }
   3582     }
   3583     return;
   3584   }
   3585 
   3586   // The following code generates these patterns:
   3587   //  (1) true_target == nullptr && false_target != nullptr
   3588   //        - opposite condition true => branch to false_target
   3589   //  (2) true_target != nullptr && false_target == nullptr
   3590   //        - condition true => branch to true_target
   3591   //  (3) true_target != nullptr && false_target != nullptr
   3592   //        - condition true => branch to true_target
   3593   //        - branch to false_target
   3594   if (IsBooleanValueOrMaterializedCondition(cond)) {
   3595     // The condition instruction has been materialized, compare the output to 0.
   3596     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
   3597     DCHECK(cond_val.IsRegister());
   3598       if (true_target == nullptr) {
   3599       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
   3600     } else {
   3601       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
   3602     }
   3603   } else {
   3604     // The condition instruction has not been materialized, use its inputs as
   3605     // the comparison and its condition as the branch condition.
   3606     HCondition* condition = cond->AsCondition();
   3607 
   3608     Primitive::Type type = condition->InputAt(0)->GetType();
   3609     if (Primitive::IsFloatingPointType(type)) {
   3610       GenerateFcmp(condition);
   3611       if (true_target == nullptr) {
   3612         IfCondition opposite_condition = condition->GetOppositeCondition();
   3613         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
   3614       } else {
   3615         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
   3616       }
   3617     } else {
   3618       // Integer cases.
   3619       Register lhs = InputRegisterAt(condition, 0);
   3620       Operand rhs = InputOperandAt(condition, 1);
   3621 
   3622       Condition arm64_cond;
   3623       vixl::aarch64::Label* non_fallthrough_target;
   3624       if (true_target == nullptr) {
   3625         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
   3626         non_fallthrough_target = false_target;
   3627       } else {
   3628         arm64_cond = ARM64Condition(condition->GetCondition());
   3629         non_fallthrough_target = true_target;
   3630       }
   3631 
   3632       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
   3633           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
   3634         switch (arm64_cond) {
   3635           case eq:
   3636             __ Cbz(lhs, non_fallthrough_target);
   3637             break;
   3638           case ne:
   3639             __ Cbnz(lhs, non_fallthrough_target);
   3640             break;
   3641           case lt:
   3642             // Test the sign bit and branch accordingly.
   3643             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
   3644             break;
   3645           case ge:
   3646             // Test the sign bit and branch accordingly.
   3647             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
   3648             break;
   3649           default:
   3650             // Without the `static_cast` the compiler throws an error for
   3651             // `-Werror=sign-promo`.
   3652             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
   3653         }
   3654       } else {
   3655         __ Cmp(lhs, rhs);
   3656         __ B(arm64_cond, non_fallthrough_target);
   3657       }
   3658     }
   3659   }
   3660 
   3661   // If neither branch falls through (case 3), the conditional branch to `true_target`
   3662   // was already emitted (case 2) and we need to emit a jump to `false_target`.
   3663   if (true_target != nullptr && false_target != nullptr) {
   3664     __ B(false_target);
   3665   }
   3666 }
   3667 
   3668 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
   3669   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
   3670   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
   3671     locations->SetInAt(0, Location::RequiresRegister());
   3672   }
   3673 }
   3674 
   3675 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
   3676   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   3677   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
   3678   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
   3679   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
   3680     true_target = nullptr;
   3681   }
   3682   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
   3683   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
   3684     false_target = nullptr;
   3685   }
   3686   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
   3687 }
   3688 
   3689 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
   3690   LocationSummary* locations = new (GetGraph()->GetArena())
   3691       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   3692   InvokeRuntimeCallingConvention calling_convention;
   3693   RegisterSet caller_saves = RegisterSet::Empty();
   3694   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   3695   locations->SetCustomSlowPathCallerSaves(caller_saves);
   3696   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
   3697     locations->SetInAt(0, Location::RequiresRegister());
   3698   }
   3699 }
   3700 
   3701 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
   3702   SlowPathCodeARM64* slow_path =
   3703       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
   3704   GenerateTestAndBranch(deoptimize,
   3705                         /* condition_input_index */ 0,
   3706                         slow_path->GetEntryLabel(),
   3707                         /* false_target */ nullptr);
   3708 }
   3709 
   3710 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   3711   LocationSummary* locations = new (GetGraph()->GetArena())
   3712       LocationSummary(flag, LocationSummary::kNoCall);
   3713   locations->SetOut(Location::RequiresRegister());
   3714 }
   3715 
   3716 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   3717   __ Ldr(OutputRegister(flag),
   3718          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
   3719 }
   3720 
   3721 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
   3722   return condition->IsCondition() &&
   3723          Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
   3724 }
   3725 
   3726 static inline Condition GetConditionForSelect(HCondition* condition) {
   3727   IfCondition cond = condition->AsCondition()->GetCondition();
   3728   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
   3729                                                      : ARM64Condition(cond);
   3730 }
   3731 
   3732 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
   3733   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   3734   if (Primitive::IsFloatingPointType(select->GetType())) {
   3735     locations->SetInAt(0, Location::RequiresFpuRegister());
   3736     locations->SetInAt(1, Location::RequiresFpuRegister());
   3737     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   3738   } else {
   3739     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
   3740     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
   3741     bool is_true_value_constant = cst_true_value != nullptr;
   3742     bool is_false_value_constant = cst_false_value != nullptr;
   3743     // Ask VIXL whether we should synthesize constants in registers.
   3744     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
   3745     Operand true_op = is_true_value_constant ?
   3746         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
   3747     Operand false_op = is_false_value_constant ?
   3748         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
   3749     bool true_value_in_register = false;
   3750     bool false_value_in_register = false;
   3751     MacroAssembler::GetCselSynthesisInformation(
   3752         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
   3753     true_value_in_register |= !is_true_value_constant;
   3754     false_value_in_register |= !is_false_value_constant;
   3755 
   3756     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
   3757                                                  : Location::ConstantLocation(cst_true_value));
   3758     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
   3759                                                   : Location::ConstantLocation(cst_false_value));
   3760     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3761   }
   3762 
   3763   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
   3764     locations->SetInAt(2, Location::RequiresRegister());
   3765   }
   3766 }
   3767 
   3768 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
   3769   HInstruction* cond = select->GetCondition();
   3770   Condition csel_cond;
   3771 
   3772   if (IsBooleanValueOrMaterializedCondition(cond)) {
   3773     if (cond->IsCondition() && cond->GetNext() == select) {
   3774       // Use the condition flags set by the previous instruction.
   3775       csel_cond = GetConditionForSelect(cond->AsCondition());
   3776     } else {
   3777       __ Cmp(InputRegisterAt(select, 2), 0);
   3778       csel_cond = ne;
   3779     }
   3780   } else if (IsConditionOnFloatingPointValues(cond)) {
   3781     GenerateFcmp(cond);
   3782     csel_cond = GetConditionForSelect(cond->AsCondition());
   3783   } else {
   3784     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
   3785     csel_cond = GetConditionForSelect(cond->AsCondition());
   3786   }
   3787 
   3788   if (Primitive::IsFloatingPointType(select->GetType())) {
   3789     __ Fcsel(OutputFPRegister(select),
   3790              InputFPRegisterAt(select, 1),
   3791              InputFPRegisterAt(select, 0),
   3792              csel_cond);
   3793   } else {
   3794     __ Csel(OutputRegister(select),
   3795             InputOperandAt(select, 1),
   3796             InputOperandAt(select, 0),
   3797             csel_cond);
   3798   }
   3799 }
   3800 
   3801 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   3802   new (GetGraph()->GetArena()) LocationSummary(info);
   3803 }
   3804 
   3805 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
   3806   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
   3807 }
   3808 
   3809 void CodeGeneratorARM64::GenerateNop() {
   3810   __ Nop();
   3811 }
   3812 
   3813 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   3814   HandleFieldGet(instruction, instruction->GetFieldInfo());
   3815 }
   3816 
   3817 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   3818   HandleFieldGet(instruction, instruction->GetFieldInfo());
   3819 }
   3820 
   3821 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   3822   HandleFieldSet(instruction);
   3823 }
   3824 
   3825 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   3826   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   3827 }
   3828 
   3829 // Temp is used for read barrier.
   3830 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
   3831   if (kEmitCompilerReadBarrier &&
   3832       (kUseBakerReadBarrier ||
   3833           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   3834           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   3835           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
   3836     return 1;
   3837   }
   3838   return 0;
   3839 }
   3840 
   3841 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
   3842 // interface pointer, one for loading the current interface.
   3843 // The other checks have one temp for loading the object's class.
   3844 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
   3845   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
   3846     return 3;
   3847   }
   3848   return 1 + NumberOfInstanceOfTemps(type_check_kind);
   3849 }
   3850 
   3851 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   3852   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   3853   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   3854   bool baker_read_barrier_slow_path = false;
   3855   switch (type_check_kind) {
   3856     case TypeCheckKind::kExactCheck:
   3857     case TypeCheckKind::kAbstractClassCheck:
   3858     case TypeCheckKind::kClassHierarchyCheck:
   3859     case TypeCheckKind::kArrayObjectCheck:
   3860       call_kind =
   3861           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
   3862       baker_read_barrier_slow_path = kUseBakerReadBarrier;
   3863       break;
   3864     case TypeCheckKind::kArrayCheck:
   3865     case TypeCheckKind::kUnresolvedCheck:
   3866     case TypeCheckKind::kInterfaceCheck:
   3867       call_kind = LocationSummary::kCallOnSlowPath;
   3868       break;
   3869   }
   3870 
   3871   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   3872   if (baker_read_barrier_slow_path) {
   3873     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   3874   }
   3875   locations->SetInAt(0, Location::RequiresRegister());
   3876   locations->SetInAt(1, Location::RequiresRegister());
   3877   // The "out" register is used as a temporary, so it overlaps with the inputs.
   3878   // Note that TypeCheckSlowPathARM64 uses this register too.
   3879   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   3880   // Add temps if necessary for read barriers.
   3881   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
   3882 }
   3883 
   3884 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
   3885   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   3886   LocationSummary* locations = instruction->GetLocations();
   3887   Location obj_loc = locations->InAt(0);
   3888   Register obj = InputRegisterAt(instruction, 0);
   3889   Register cls = InputRegisterAt(instruction, 1);
   3890   Location out_loc = locations->Out();
   3891   Register out = OutputRegister(instruction);
   3892   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
   3893   DCHECK_LE(num_temps, 1u);
   3894   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   3895   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   3896   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   3897   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   3898   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   3899 
   3900   vixl::aarch64::Label done, zero;
   3901   SlowPathCodeARM64* slow_path = nullptr;
   3902 
   3903   // Return 0 if `obj` is null.
   3904   // Avoid null check if we know `obj` is not null.
   3905   if (instruction->MustDoNullCheck()) {
   3906     __ Cbz(obj, &zero);
   3907   }
   3908 
   3909   switch (type_check_kind) {
   3910     case TypeCheckKind::kExactCheck: {
   3911       // /* HeapReference<Class> */ out = obj->klass_
   3912       GenerateReferenceLoadTwoRegisters(instruction,
   3913                                         out_loc,
   3914                                         obj_loc,
   3915                                         class_offset,
   3916                                         maybe_temp_loc,
   3917                                         kCompilerReadBarrierOption);
   3918       __ Cmp(out, cls);
   3919       __ Cset(out, eq);
   3920       if (zero.IsLinked()) {
   3921         __ B(&done);
   3922       }
   3923       break;
   3924     }
   3925 
   3926     case TypeCheckKind::kAbstractClassCheck: {
   3927       // /* HeapReference<Class> */ out = obj->klass_
   3928       GenerateReferenceLoadTwoRegisters(instruction,
   3929                                         out_loc,
   3930                                         obj_loc,
   3931                                         class_offset,
   3932                                         maybe_temp_loc,
   3933                                         kCompilerReadBarrierOption);
   3934       // If the class is abstract, we eagerly fetch the super class of the
   3935       // object to avoid doing a comparison we know will fail.
   3936       vixl::aarch64::Label loop, success;
   3937       __ Bind(&loop);
   3938       // /* HeapReference<Class> */ out = out->super_class_
   3939       GenerateReferenceLoadOneRegister(instruction,
   3940                                        out_loc,
   3941                                        super_offset,
   3942                                        maybe_temp_loc,
   3943                                        kCompilerReadBarrierOption);
   3944       // If `out` is null, we use it for the result, and jump to `done`.
   3945       __ Cbz(out, &done);
   3946       __ Cmp(out, cls);
   3947       __ B(ne, &loop);
   3948       __ Mov(out, 1);
   3949       if (zero.IsLinked()) {
   3950         __ B(&done);
   3951       }
   3952       break;
   3953     }
   3954 
   3955     case TypeCheckKind::kClassHierarchyCheck: {
   3956       // /* HeapReference<Class> */ out = obj->klass_
   3957       GenerateReferenceLoadTwoRegisters(instruction,
   3958                                         out_loc,
   3959                                         obj_loc,
   3960                                         class_offset,
   3961                                         maybe_temp_loc,
   3962                                         kCompilerReadBarrierOption);
   3963       // Walk over the class hierarchy to find a match.
   3964       vixl::aarch64::Label loop, success;
   3965       __ Bind(&loop);
   3966       __ Cmp(out, cls);
   3967       __ B(eq, &success);
   3968       // /* HeapReference<Class> */ out = out->super_class_
   3969       GenerateReferenceLoadOneRegister(instruction,
   3970                                        out_loc,
   3971                                        super_offset,
   3972                                        maybe_temp_loc,
   3973                                        kCompilerReadBarrierOption);
   3974       __ Cbnz(out, &loop);
   3975       // If `out` is null, we use it for the result, and jump to `done`.
   3976       __ B(&done);
   3977       __ Bind(&success);
   3978       __ Mov(out, 1);
   3979       if (zero.IsLinked()) {
   3980         __ B(&done);
   3981       }
   3982       break;
   3983     }
   3984 
   3985     case TypeCheckKind::kArrayObjectCheck: {
   3986       // /* HeapReference<Class> */ out = obj->klass_
   3987       GenerateReferenceLoadTwoRegisters(instruction,
   3988                                         out_loc,
   3989                                         obj_loc,
   3990                                         class_offset,
   3991                                         maybe_temp_loc,
   3992                                         kCompilerReadBarrierOption);
   3993       // Do an exact check.
   3994       vixl::aarch64::Label exact_check;
   3995       __ Cmp(out, cls);
   3996       __ B(eq, &exact_check);
   3997       // Otherwise, we need to check that the object's class is a non-primitive array.
   3998       // /* HeapReference<Class> */ out = out->component_type_
   3999       GenerateReferenceLoadOneRegister(instruction,
   4000                                        out_loc,
   4001                                        component_offset,
   4002                                        maybe_temp_loc,
   4003                                        kCompilerReadBarrierOption);
   4004       // If `out` is null, we use it for the result, and jump to `done`.
   4005       __ Cbz(out, &done);
   4006       __ Ldrh(out, HeapOperand(out, primitive_offset));
   4007       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   4008       __ Cbnz(out, &zero);
   4009       __ Bind(&exact_check);
   4010       __ Mov(out, 1);
   4011       __ B(&done);
   4012       break;
   4013     }
   4014 
   4015     case TypeCheckKind::kArrayCheck: {
   4016       // No read barrier since the slow path will retry upon failure.
   4017       // /* HeapReference<Class> */ out = obj->klass_
   4018       GenerateReferenceLoadTwoRegisters(instruction,
   4019                                         out_loc,
   4020                                         obj_loc,
   4021                                         class_offset,
   4022                                         maybe_temp_loc,
   4023                                         kWithoutReadBarrier);
   4024       __ Cmp(out, cls);
   4025       DCHECK(locations->OnlyCallsOnSlowPath());
   4026       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
   4027                                                                       /* is_fatal */ false);
   4028       codegen_->AddSlowPath(slow_path);
   4029       __ B(ne, slow_path->GetEntryLabel());
   4030       __ Mov(out, 1);
   4031       if (zero.IsLinked()) {
   4032         __ B(&done);
   4033       }
   4034       break;
   4035     }
   4036 
   4037     case TypeCheckKind::kUnresolvedCheck:
   4038     case TypeCheckKind::kInterfaceCheck: {
   4039       // Note that we indeed only call on slow path, but we always go
   4040       // into the slow path for the unresolved and interface check
   4041       // cases.
   4042       //
   4043       // We cannot directly call the InstanceofNonTrivial runtime
   4044       // entry point without resorting to a type checking slow path
   4045       // here (i.e. by calling InvokeRuntime directly), as it would
   4046       // require to assign fixed registers for the inputs of this
   4047       // HInstanceOf instruction (following the runtime calling
   4048       // convention), which might be cluttered by the potential first
   4049       // read barrier emission at the beginning of this method.
   4050       //
   4051       // TODO: Introduce a new runtime entry point taking the object
   4052       // to test (instead of its class) as argument, and let it deal
   4053       // with the read barrier issues. This will let us refactor this
   4054       // case of the `switch` code as it was previously (with a direct
   4055       // call to the runtime not using a type checking slow path).
   4056       // This should also be beneficial for the other cases above.
   4057       DCHECK(locations->OnlyCallsOnSlowPath());
   4058       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
   4059                                                                       /* is_fatal */ false);
   4060       codegen_->AddSlowPath(slow_path);
   4061       __ B(slow_path->GetEntryLabel());
   4062       if (zero.IsLinked()) {
   4063         __ B(&done);
   4064       }
   4065       break;
   4066     }
   4067   }
   4068 
   4069   if (zero.IsLinked()) {
   4070     __ Bind(&zero);
   4071     __ Mov(out, 0);
   4072   }
   4073 
   4074   if (done.IsLinked()) {
   4075     __ Bind(&done);
   4076   }
   4077 
   4078   if (slow_path != nullptr) {
   4079     __ Bind(slow_path->GetExitLabel());
   4080   }
   4081 }
   4082 
   4083 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
   4084   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   4085   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
   4086 
   4087   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   4088   switch (type_check_kind) {
   4089     case TypeCheckKind::kExactCheck:
   4090     case TypeCheckKind::kAbstractClassCheck:
   4091     case TypeCheckKind::kClassHierarchyCheck:
   4092     case TypeCheckKind::kArrayObjectCheck:
   4093       call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
   4094           LocationSummary::kCallOnSlowPath :
   4095           LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
   4096       break;
   4097     case TypeCheckKind::kArrayCheck:
   4098     case TypeCheckKind::kUnresolvedCheck:
   4099     case TypeCheckKind::kInterfaceCheck:
   4100       call_kind = LocationSummary::kCallOnSlowPath;
   4101       break;
   4102   }
   4103 
   4104   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   4105   locations->SetInAt(0, Location::RequiresRegister());
   4106   locations->SetInAt(1, Location::RequiresRegister());
   4107   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
   4108   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
   4109 }
   4110 
   4111 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
   4112   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   4113   LocationSummary* locations = instruction->GetLocations();
   4114   Location obj_loc = locations->InAt(0);
   4115   Register obj = InputRegisterAt(instruction, 0);
   4116   Register cls = InputRegisterAt(instruction, 1);
   4117   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
   4118   DCHECK_GE(num_temps, 1u);
   4119   DCHECK_LE(num_temps, 3u);
   4120   Location temp_loc = locations->GetTemp(0);
   4121   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
   4122   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
   4123   Register temp = WRegisterFrom(temp_loc);
   4124   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   4125   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   4126   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   4127   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   4128   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
   4129   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
   4130   const uint32_t object_array_data_offset =
   4131       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   4132 
   4133   bool is_type_check_slow_path_fatal = false;
   4134   // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
   4135   // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
   4136   // read barriers is done for performance and code size reasons.
   4137   if (!kEmitCompilerReadBarrier) {
   4138     is_type_check_slow_path_fatal =
   4139         (type_check_kind == TypeCheckKind::kExactCheck ||
   4140          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   4141          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   4142          type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
   4143         !instruction->CanThrowIntoCatchBlock();
   4144   }
   4145   SlowPathCodeARM64* type_check_slow_path =
   4146       new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
   4147                                                           is_type_check_slow_path_fatal);
   4148   codegen_->AddSlowPath(type_check_slow_path);
   4149 
   4150   vixl::aarch64::Label done;
   4151   // Avoid null check if we know obj is not null.
   4152   if (instruction->MustDoNullCheck()) {
   4153     __ Cbz(obj, &done);
   4154   }
   4155 
   4156   switch (type_check_kind) {
   4157     case TypeCheckKind::kExactCheck:
   4158     case TypeCheckKind::kArrayCheck: {
   4159       // /* HeapReference<Class> */ temp = obj->klass_
   4160       GenerateReferenceLoadTwoRegisters(instruction,
   4161                                         temp_loc,
   4162                                         obj_loc,
   4163                                         class_offset,
   4164                                         maybe_temp2_loc,
   4165                                         kWithoutReadBarrier);
   4166 
   4167       __ Cmp(temp, cls);
   4168       // Jump to slow path for throwing the exception or doing a
   4169       // more involved array check.
   4170       __ B(ne, type_check_slow_path->GetEntryLabel());
   4171       break;
   4172     }
   4173 
   4174     case TypeCheckKind::kAbstractClassCheck: {
   4175       // /* HeapReference<Class> */ temp = obj->klass_
   4176       GenerateReferenceLoadTwoRegisters(instruction,
   4177                                         temp_loc,
   4178                                         obj_loc,
   4179                                         class_offset,
   4180                                         maybe_temp2_loc,
   4181                                         kWithoutReadBarrier);
   4182 
   4183       // If the class is abstract, we eagerly fetch the super class of the
   4184       // object to avoid doing a comparison we know will fail.
   4185       vixl::aarch64::Label loop;
   4186       __ Bind(&loop);
   4187       // /* HeapReference<Class> */ temp = temp->super_class_
   4188       GenerateReferenceLoadOneRegister(instruction,
   4189                                        temp_loc,
   4190                                        super_offset,
   4191                                        maybe_temp2_loc,
   4192                                        kWithoutReadBarrier);
   4193 
   4194       // If the class reference currently in `temp` is null, jump to the slow path to throw the
   4195       // exception.
   4196       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
   4197       // Otherwise, compare classes.
   4198       __ Cmp(temp, cls);
   4199       __ B(ne, &loop);
   4200       break;
   4201     }
   4202 
   4203     case TypeCheckKind::kClassHierarchyCheck: {
   4204       // /* HeapReference<Class> */ temp = obj->klass_
   4205       GenerateReferenceLoadTwoRegisters(instruction,
   4206                                         temp_loc,
   4207                                         obj_loc,
   4208                                         class_offset,
   4209                                         maybe_temp2_loc,
   4210                                         kWithoutReadBarrier);
   4211 
   4212       // Walk over the class hierarchy to find a match.
   4213       vixl::aarch64::Label loop;
   4214       __ Bind(&loop);
   4215       __ Cmp(temp, cls);
   4216       __ B(eq, &done);
   4217 
   4218       // /* HeapReference<Class> */ temp = temp->super_class_
   4219       GenerateReferenceLoadOneRegister(instruction,
   4220                                        temp_loc,
   4221                                        super_offset,
   4222                                        maybe_temp2_loc,
   4223                                        kWithoutReadBarrier);
   4224 
   4225       // If the class reference currently in `temp` is not null, jump
   4226       // back at the beginning of the loop.
   4227       __ Cbnz(temp, &loop);
   4228       // Otherwise, jump to the slow path to throw the exception.
   4229       __ B(type_check_slow_path->GetEntryLabel());
   4230       break;
   4231     }
   4232 
   4233     case TypeCheckKind::kArrayObjectCheck: {
   4234       // /* HeapReference<Class> */ temp = obj->klass_
   4235       GenerateReferenceLoadTwoRegisters(instruction,
   4236                                         temp_loc,
   4237                                         obj_loc,
   4238                                         class_offset,
   4239                                         maybe_temp2_loc,
   4240                                         kWithoutReadBarrier);
   4241 
   4242       // Do an exact check.
   4243       __ Cmp(temp, cls);
   4244       __ B(eq, &done);
   4245 
   4246       // Otherwise, we need to check that the object's class is a non-primitive array.
   4247       // /* HeapReference<Class> */ temp = temp->component_type_
   4248       GenerateReferenceLoadOneRegister(instruction,
   4249                                        temp_loc,
   4250                                        component_offset,
   4251                                        maybe_temp2_loc,
   4252                                        kWithoutReadBarrier);
   4253 
   4254       // If the component type is null, jump to the slow path to throw the exception.
   4255       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
   4256       // Otherwise, the object is indeed an array. Further check that this component type is not a
   4257       // primitive type.
   4258       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
   4259       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
   4260       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
   4261       break;
   4262     }
   4263 
   4264     case TypeCheckKind::kUnresolvedCheck:
   4265       // We always go into the type check slow path for the unresolved check cases.
   4266       //
   4267       // We cannot directly call the CheckCast runtime entry point
   4268       // without resorting to a type checking slow path here (i.e. by
   4269       // calling InvokeRuntime directly), as it would require to
   4270       // assign fixed registers for the inputs of this HInstanceOf
   4271       // instruction (following the runtime calling convention), which
   4272       // might be cluttered by the potential first read barrier
   4273       // emission at the beginning of this method.
   4274       __ B(type_check_slow_path->GetEntryLabel());
   4275       break;
   4276     case TypeCheckKind::kInterfaceCheck: {
   4277       // /* HeapReference<Class> */ temp = obj->klass_
   4278       GenerateReferenceLoadTwoRegisters(instruction,
   4279                                         temp_loc,
   4280                                         obj_loc,
   4281                                         class_offset,
   4282                                         maybe_temp2_loc,
   4283                                         kWithoutReadBarrier);
   4284 
   4285       // /* HeapReference<Class> */ temp = temp->iftable_
   4286       GenerateReferenceLoadTwoRegisters(instruction,
   4287                                         temp_loc,
   4288                                         temp_loc,
   4289                                         iftable_offset,
   4290                                         maybe_temp2_loc,
   4291                                         kWithoutReadBarrier);
   4292       // Iftable is never null.
   4293       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
   4294       // Loop through the iftable and check if any class matches.
   4295       vixl::aarch64::Label start_loop;
   4296       __ Bind(&start_loop);
   4297       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
   4298       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
   4299       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
   4300       // Go to next interface.
   4301       __ Add(temp, temp, 2 * kHeapReferenceSize);
   4302       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
   4303       // Compare the classes and continue the loop if they do not match.
   4304       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
   4305       __ B(ne, &start_loop);
   4306       break;
   4307     }
   4308   }
   4309   __ Bind(&done);
   4310 
   4311   __ Bind(type_check_slow_path->GetExitLabel());
   4312 }
   4313 
   4314 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
   4315   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   4316   locations->SetOut(Location::ConstantLocation(constant));
   4317 }
   4318 
   4319 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   4320   // Will be generated at use site.
   4321 }
   4322 
   4323 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
   4324   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   4325   locations->SetOut(Location::ConstantLocation(constant));
   4326 }
   4327 
   4328 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   4329   // Will be generated at use site.
   4330 }
   4331 
   4332 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   4333   // The trampoline uses the same calling convention as dex calling conventions,
   4334   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
   4335   // the method_idx.
   4336   HandleInvoke(invoke);
   4337 }
   4338 
   4339 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   4340   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
   4341 }
   4342 
   4343 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
   4344   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
   4345   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
   4346 }
   4347 
   4348 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   4349   HandleInvoke(invoke);
   4350 }
   4351 
   4352 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   4353   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   4354   LocationSummary* locations = invoke->GetLocations();
   4355   Register temp = XRegisterFrom(locations->GetTemp(0));
   4356   Location receiver = locations->InAt(0);
   4357   Offset class_offset = mirror::Object::ClassOffset();
   4358   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
   4359 
   4360   // The register ip1 is required to be used for the hidden argument in
   4361   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
   4362   MacroAssembler* masm = GetVIXLAssembler();
   4363   UseScratchRegisterScope scratch_scope(masm);
   4364   scratch_scope.Exclude(ip1);
   4365   __ Mov(ip1, invoke->GetDexMethodIndex());
   4366 
   4367   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   4368   if (receiver.IsStackSlot()) {
   4369     __ Ldr(temp.W(), StackOperandFrom(receiver));
   4370     {
   4371       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   4372       // /* HeapReference<Class> */ temp = temp->klass_
   4373       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
   4374       codegen_->MaybeRecordImplicitNullCheck(invoke);
   4375     }
   4376   } else {
   4377     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   4378     // /* HeapReference<Class> */ temp = receiver->klass_
   4379     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   4380     codegen_->MaybeRecordImplicitNullCheck(invoke);
   4381   }
   4382 
   4383   // Instead of simply (possibly) unpoisoning `temp` here, we should
   4384   // emit a read barrier for the previous class reference load.
   4385   // However this is not required in practice, as this is an
   4386   // intermediate/temporary reference and because the current
   4387   // concurrent copying collector keeps the from-space memory
   4388   // intact/accessible until the end of the marking phase (the
   4389   // concurrent copying collector may not in the future).
   4390   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   4391   __ Ldr(temp,
   4392       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
   4393   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   4394       invoke->GetImtIndex(), kArm64PointerSize));
   4395   // temp = temp->GetImtEntryAt(method_offset);
   4396   __ Ldr(temp, MemOperand(temp, method_offset));
   4397   // lr = temp->GetEntryPoint();
   4398   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
   4399 
   4400   {
   4401     // Ensure the pc position is recorded immediately after the `blr` instruction.
   4402     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
   4403 
   4404     // lr();
   4405     __ blr(lr);
   4406     DCHECK(!codegen_->IsLeafMethod());
   4407     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   4408   }
   4409 }
   4410 
   4411 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   4412   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   4413   if (intrinsic.TryDispatch(invoke)) {
   4414     return;
   4415   }
   4416 
   4417   HandleInvoke(invoke);
   4418 }
   4419 
   4420 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   4421   // Explicit clinit checks triggered by static invokes must have been pruned by
   4422   // art::PrepareForRegisterAllocation.
   4423   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   4424 
   4425   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   4426   if (intrinsic.TryDispatch(invoke)) {
   4427     return;
   4428   }
   4429 
   4430   HandleInvoke(invoke);
   4431 }
   4432 
   4433 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
   4434   if (invoke->GetLocations()->Intrinsified()) {
   4435     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
   4436     intrinsic.Dispatch(invoke);
   4437     return true;
   4438   }
   4439   return false;
   4440 }
   4441 
   4442 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
   4443       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
   4444       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
   4445   // On ARM64 we support all dispatch types.
   4446   return desired_dispatch_info;
   4447 }
   4448 
   4449 Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
   4450                                                                     Location temp) {
   4451   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
   4452   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   4453   switch (invoke->GetMethodLoadKind()) {
   4454     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
   4455       uint32_t offset =
   4456           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
   4457       // temp = thread->string_init_entrypoint
   4458       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
   4459       break;
   4460     }
   4461     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
   4462       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
   4463       break;
   4464     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
   4465       // Load method address from literal pool.
   4466       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
   4467       break;
   4468     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
   4469       // Add ADRP with its PC-relative DexCache access patch.
   4470       const DexFile& dex_file = invoke->GetDexFileForPcRelativeDexCache();
   4471       uint32_t element_offset = invoke->GetDexCacheArrayOffset();
   4472       vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
   4473       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
   4474       // Add LDR with its PC-relative DexCache access patch.
   4475       vixl::aarch64::Label* ldr_label =
   4476           NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
   4477       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
   4478       break;
   4479     }
   4480     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
   4481       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
   4482       Register reg = XRegisterFrom(temp);
   4483       Register method_reg;
   4484       if (current_method.IsRegister()) {
   4485         method_reg = XRegisterFrom(current_method);
   4486       } else {
   4487         DCHECK(invoke->GetLocations()->Intrinsified());
   4488         DCHECK(!current_method.IsValid());
   4489         method_reg = reg;
   4490         __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
   4491       }
   4492 
   4493       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
   4494       __ Ldr(reg.X(),
   4495              MemOperand(method_reg.X(),
   4496                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
   4497       // temp = temp[index_in_cache];
   4498       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
   4499       uint32_t index_in_cache = invoke->GetDexMethodIndex();
   4500     __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
   4501       break;
   4502     }
   4503   }
   4504   return callee_method;
   4505 }
   4506 
   4507 void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   4508   // All registers are assumed to be correctly set up.
   4509   Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
   4510 
   4511   switch (invoke->GetCodePtrLocation()) {
   4512     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
   4513       __ Bl(&frame_entry_label_);
   4514       break;
   4515     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
   4516       // LR = callee_method->entry_point_from_quick_compiled_code_;
   4517       __ Ldr(lr, MemOperand(
   4518           XRegisterFrom(callee_method),
   4519           ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
   4520       {
   4521         // To ensure that the pc position is recorded immediately after the `blr` instruction
   4522         // BLR must be the last instruction emitted in this function.
   4523         // Recording the pc will occur right after returning from this function.
   4524         ExactAssemblyScope eas(GetVIXLAssembler(),
   4525                                kInstructionSize,
   4526                                CodeBufferCheckScope::kExactSize);
   4527         // lr()
   4528         __ blr(lr);
   4529       }
   4530       break;
   4531   }
   4532 
   4533   DCHECK(!IsLeafMethod());
   4534 }
   4535 
   4536 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
   4537   // Use the calling convention instead of the location of the receiver, as
   4538   // intrinsics may have put the receiver in a different register. In the intrinsics
   4539   // slow path, the arguments have been moved to the right place, so here we are
   4540   // guaranteed that the receiver is the first register of the calling convention.
   4541   InvokeDexCallingConvention calling_convention;
   4542   Register receiver = calling_convention.GetRegisterAt(0);
   4543   Register temp = XRegisterFrom(temp_in);
   4544   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   4545       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
   4546   Offset class_offset = mirror::Object::ClassOffset();
   4547   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
   4548 
   4549   DCHECK(receiver.IsRegister());
   4550 
   4551   {
   4552     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   4553     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   4554     // /* HeapReference<Class> */ temp = receiver->klass_
   4555     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   4556     MaybeRecordImplicitNullCheck(invoke);
   4557   }
   4558   // Instead of simply (possibly) unpoisoning `temp` here, we should
   4559   // emit a read barrier for the previous class reference load.
   4560   // intermediate/temporary reference and because the current
   4561   // concurrent copying collector keeps the from-space memory
   4562   // intact/accessible until the end of the marking phase (the
   4563   // concurrent copying collector may not in the future).
   4564   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   4565   // temp = temp->GetMethodAt(method_offset);
   4566   __ Ldr(temp, MemOperand(temp, method_offset));
   4567   // lr = temp->GetEntryPoint();
   4568   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
   4569   {
   4570     // To ensure that the pc position is recorded immediately after the `blr` instruction
   4571     // BLR should be the last instruction emitted in this function.
   4572     // Recording the pc will occur right after returning from this function.
   4573     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
   4574     // lr();
   4575     __ blr(lr);
   4576   }
   4577 }
   4578 
   4579 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   4580   HandleInvoke(invoke);
   4581 }
   4582 
   4583 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   4584   codegen_->GenerateInvokePolymorphicCall(invoke);
   4585 }
   4586 
   4587 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
   4588     const DexFile& dex_file,
   4589     dex::StringIndex string_index,
   4590     vixl::aarch64::Label* adrp_label) {
   4591   return
   4592       NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
   4593 }
   4594 
   4595 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
   4596     const DexFile& dex_file,
   4597     dex::TypeIndex type_index,
   4598     vixl::aarch64::Label* adrp_label) {
   4599   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_);
   4600 }
   4601 
   4602 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
   4603     const DexFile& dex_file,
   4604     dex::TypeIndex type_index,
   4605     vixl::aarch64::Label* adrp_label) {
   4606   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
   4607 }
   4608 
   4609 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
   4610     const DexFile& dex_file,
   4611     uint32_t element_offset,
   4612     vixl::aarch64::Label* adrp_label) {
   4613   return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
   4614 }
   4615 
   4616 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
   4617   baker_read_barrier_patches_.emplace_back(custom_data);
   4618   return &baker_read_barrier_patches_.back().label;
   4619 }
   4620 
   4621 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
   4622     const DexFile& dex_file,
   4623     uint32_t offset_or_index,
   4624     vixl::aarch64::Label* adrp_label,
   4625     ArenaDeque<PcRelativePatchInfo>* patches) {
   4626   // Add a patch entry and return the label.
   4627   patches->emplace_back(dex_file, offset_or_index);
   4628   PcRelativePatchInfo* info = &patches->back();
   4629   vixl::aarch64::Label* label = &info->label;
   4630   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
   4631   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
   4632   return label;
   4633 }
   4634 
   4635 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
   4636     const DexFile& dex_file, dex::StringIndex string_index) {
   4637   return boot_image_string_patches_.GetOrCreate(
   4638       StringReference(&dex_file, string_index),
   4639       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
   4640 }
   4641 
   4642 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
   4643     const DexFile& dex_file, dex::TypeIndex type_index) {
   4644   return boot_image_type_patches_.GetOrCreate(
   4645       TypeReference(&dex_file, type_index),
   4646       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
   4647 }
   4648 
   4649 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
   4650     uint64_t address) {
   4651   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
   4652 }
   4653 
   4654 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
   4655     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
   4656   jit_string_roots_.Overwrite(StringReference(&dex_file, string_index),
   4657                               reinterpret_cast64<uint64_t>(handle.GetReference()));
   4658   return jit_string_patches_.GetOrCreate(
   4659       StringReference(&dex_file, string_index),
   4660       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
   4661 }
   4662 
   4663 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
   4664     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
   4665   jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index),
   4666                              reinterpret_cast64<uint64_t>(handle.GetReference()));
   4667   return jit_class_patches_.GetOrCreate(
   4668       TypeReference(&dex_file, type_index),
   4669       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
   4670 }
   4671 
   4672 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
   4673                                              vixl::aarch64::Register reg) {
   4674   DCHECK(reg.IsX());
   4675   SingleEmissionCheckScope guard(GetVIXLAssembler());
   4676   __ Bind(fixup_label);
   4677   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
   4678 }
   4679 
   4680 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
   4681                                             vixl::aarch64::Register out,
   4682                                             vixl::aarch64::Register base) {
   4683   DCHECK(out.IsX());
   4684   DCHECK(base.IsX());
   4685   SingleEmissionCheckScope guard(GetVIXLAssembler());
   4686   __ Bind(fixup_label);
   4687   __ add(out, base, Operand(/* offset placeholder */ 0));
   4688 }
   4689 
   4690 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
   4691                                                   vixl::aarch64::Register out,
   4692                                                   vixl::aarch64::Register base) {
   4693   DCHECK(base.IsX());
   4694   SingleEmissionCheckScope guard(GetVIXLAssembler());
   4695   __ Bind(fixup_label);
   4696   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
   4697 }
   4698 
   4699 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
   4700 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
   4701     const ArenaDeque<PcRelativePatchInfo>& infos,
   4702     ArenaVector<LinkerPatch>* linker_patches) {
   4703   for (const PcRelativePatchInfo& info : infos) {
   4704     linker_patches->push_back(Factory(info.label.GetLocation(),
   4705                                       &info.target_dex_file,
   4706                                       info.pc_insn_label->GetLocation(),
   4707                                       info.offset_or_index));
   4708   }
   4709 }
   4710 
   4711 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   4712   DCHECK(linker_patches->empty());
   4713   size_t size =
   4714       pc_relative_dex_cache_patches_.size() +
   4715       boot_image_string_patches_.size() +
   4716       pc_relative_string_patches_.size() +
   4717       boot_image_type_patches_.size() +
   4718       pc_relative_type_patches_.size() +
   4719       type_bss_entry_patches_.size() +
   4720       baker_read_barrier_patches_.size();
   4721   linker_patches->reserve(size);
   4722   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
   4723     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
   4724                                                               &info.target_dex_file,
   4725                                                               info.pc_insn_label->GetLocation(),
   4726                                                               info.offset_or_index));
   4727   }
   4728   for (const auto& entry : boot_image_string_patches_) {
   4729     const StringReference& target_string = entry.first;
   4730     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
   4731     linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
   4732                                                        target_string.dex_file,
   4733                                                        target_string.string_index.index_));
   4734   }
   4735   if (!GetCompilerOptions().IsBootImage()) {
   4736     DCHECK(pc_relative_type_patches_.empty());
   4737     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
   4738                                                                   linker_patches);
   4739   } else {
   4740     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
   4741                                                                 linker_patches);
   4742     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
   4743                                                                   linker_patches);
   4744   }
   4745   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
   4746                                                               linker_patches);
   4747   for (const auto& entry : boot_image_type_patches_) {
   4748     const TypeReference& target_type = entry.first;
   4749     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
   4750     linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
   4751                                                      target_type.dex_file,
   4752                                                      target_type.type_index.index_));
   4753   }
   4754   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
   4755     linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
   4756                                                                        info.custom_data));
   4757   }
   4758   DCHECK_EQ(size, linker_patches->size());
   4759 }
   4760 
   4761 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
   4762                                                                       Uint32ToLiteralMap* map) {
   4763   return map->GetOrCreate(
   4764       value,
   4765       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
   4766 }
   4767 
   4768 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
   4769   return uint64_literals_.GetOrCreate(
   4770       value,
   4771       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
   4772 }
   4773 
   4774 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
   4775     MethodReference target_method,
   4776     MethodToLiteralMap* map) {
   4777   return map->GetOrCreate(
   4778       target_method,
   4779       [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
   4780 }
   4781 
   4782 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   4783   // Explicit clinit checks triggered by static invokes must have been pruned by
   4784   // art::PrepareForRegisterAllocation.
   4785   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   4786 
   4787   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   4788     return;
   4789   }
   4790 
   4791   // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
   4792   // are no pools emitted.
   4793   EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
   4794   LocationSummary* locations = invoke->GetLocations();
   4795   codegen_->GenerateStaticOrDirectCall(
   4796       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
   4797   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   4798 }
   4799 
   4800 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   4801   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   4802     return;
   4803   }
   4804 
   4805   // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
   4806   // are no pools emitted.
   4807   EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
   4808   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   4809   DCHECK(!codegen_->IsLeafMethod());
   4810   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   4811 }
   4812 
   4813 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
   4814     HLoadClass::LoadKind desired_class_load_kind) {
   4815   switch (desired_class_load_kind) {
   4816     case HLoadClass::LoadKind::kInvalid:
   4817       LOG(FATAL) << "UNREACHABLE";
   4818       UNREACHABLE();
   4819     case HLoadClass::LoadKind::kReferrersClass:
   4820       break;
   4821     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
   4822       DCHECK(!GetCompilerOptions().GetCompilePic());
   4823       break;
   4824     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
   4825       DCHECK(GetCompilerOptions().GetCompilePic());
   4826       break;
   4827     case HLoadClass::LoadKind::kBootImageAddress:
   4828       break;
   4829     case HLoadClass::LoadKind::kBssEntry:
   4830       DCHECK(!Runtime::Current()->UseJitCompilation());
   4831       break;
   4832     case HLoadClass::LoadKind::kJitTableAddress:
   4833       DCHECK(Runtime::Current()->UseJitCompilation());
   4834       break;
   4835     case HLoadClass::LoadKind::kDexCacheViaMethod:
   4836       break;
   4837   }
   4838   return desired_class_load_kind;
   4839 }
   4840 
   4841 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
   4842   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   4843   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
   4844     InvokeRuntimeCallingConvention calling_convention;
   4845     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
   4846         cls,
   4847         LocationFrom(calling_convention.GetRegisterAt(0)),
   4848         LocationFrom(vixl::aarch64::x0));
   4849     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
   4850     return;
   4851   }
   4852   DCHECK(!cls->NeedsAccessCheck());
   4853 
   4854   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   4855   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
   4856       ? LocationSummary::kCallOnSlowPath
   4857       : LocationSummary::kNoCall;
   4858   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
   4859   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
   4860     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   4861   }
   4862 
   4863   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
   4864     locations->SetInAt(0, Location::RequiresRegister());
   4865   }
   4866   locations->SetOut(Location::RequiresRegister());
   4867   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
   4868     if (!kUseReadBarrier || kUseBakerReadBarrier) {
   4869       // Rely on the type resolution or initialization and marking to save everything we need.
   4870       locations->AddTemp(FixedTempLocation());
   4871       RegisterSet caller_saves = RegisterSet::Empty();
   4872       InvokeRuntimeCallingConvention calling_convention;
   4873       caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   4874       DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
   4875                 RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
   4876                              Primitive::kPrimNot).GetCode());
   4877       locations->SetCustomSlowPathCallerSaves(caller_saves);
   4878     } else {
   4879       // For non-Baker read barrier we have a temp-clobbering call.
   4880     }
   4881   }
   4882 }
   4883 
   4884 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   4885 // move.
   4886 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   4887   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   4888   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
   4889     codegen_->GenerateLoadClassRuntimeCall(cls);
   4890     return;
   4891   }
   4892   DCHECK(!cls->NeedsAccessCheck());
   4893 
   4894   Location out_loc = cls->GetLocations()->Out();
   4895   Register out = OutputRegister(cls);
   4896   Register bss_entry_temp;
   4897   vixl::aarch64::Label* bss_entry_adrp_label = nullptr;
   4898 
   4899   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
   4900       ? kWithoutReadBarrier
   4901       : kCompilerReadBarrierOption;
   4902   bool generate_null_check = false;
   4903   switch (load_kind) {
   4904     case HLoadClass::LoadKind::kReferrersClass: {
   4905       DCHECK(!cls->CanCallRuntime());
   4906       DCHECK(!cls->MustGenerateClinitCheck());
   4907       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
   4908       Register current_method = InputRegisterAt(cls, 0);
   4909       GenerateGcRootFieldLoad(cls,
   4910                               out_loc,
   4911                               current_method,
   4912                               ArtMethod::DeclaringClassOffset().Int32Value(),
   4913                               /* fixup_label */ nullptr,
   4914                               read_barrier_option);
   4915       break;
   4916     }
   4917     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
   4918       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   4919       __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
   4920                                                             cls->GetTypeIndex()));
   4921       break;
   4922     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
   4923       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   4924       // Add ADRP with its PC-relative type patch.
   4925       const DexFile& dex_file = cls->GetDexFile();
   4926       dex::TypeIndex type_index = cls->GetTypeIndex();
   4927       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
   4928       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
   4929       // Add ADD with its PC-relative type patch.
   4930       vixl::aarch64::Label* add_label =
   4931           codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
   4932       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
   4933       break;
   4934     }
   4935     case HLoadClass::LoadKind::kBootImageAddress: {
   4936       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   4937       uint32_t address = dchecked_integral_cast<uint32_t>(
   4938           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
   4939       DCHECK_NE(address, 0u);
   4940       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   4941       break;
   4942     }
   4943     case HLoadClass::LoadKind::kBssEntry: {
   4944       // Add ADRP with its PC-relative Class .bss entry patch.
   4945       const DexFile& dex_file = cls->GetDexFile();
   4946       dex::TypeIndex type_index = cls->GetTypeIndex();
   4947       bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0));
   4948       bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
   4949       codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
   4950       // Add LDR with its PC-relative Class patch.
   4951       vixl::aarch64::Label* ldr_label =
   4952           codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label);
   4953       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
   4954       GenerateGcRootFieldLoad(cls,
   4955                               out_loc,
   4956                               bss_entry_temp,
   4957                               /* offset placeholder */ 0u,
   4958                               ldr_label,
   4959                               read_barrier_option);
   4960       generate_null_check = true;
   4961       break;
   4962     }
   4963     case HLoadClass::LoadKind::kJitTableAddress: {
   4964       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
   4965                                                        cls->GetTypeIndex(),
   4966                                                        cls->GetClass()));
   4967       GenerateGcRootFieldLoad(cls,
   4968                               out_loc,
   4969                               out.X(),
   4970                               /* offset */ 0,
   4971                               /* fixup_label */ nullptr,
   4972                               read_barrier_option);
   4973       break;
   4974     }
   4975     case HLoadClass::LoadKind::kDexCacheViaMethod:
   4976     case HLoadClass::LoadKind::kInvalid:
   4977       LOG(FATAL) << "UNREACHABLE";
   4978       UNREACHABLE();
   4979   }
   4980 
   4981   bool do_clinit = cls->MustGenerateClinitCheck();
   4982   if (generate_null_check || do_clinit) {
   4983     DCHECK(cls->CanCallRuntime());
   4984     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
   4985         cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label);
   4986     codegen_->AddSlowPath(slow_path);
   4987     if (generate_null_check) {
   4988       __ Cbz(out, slow_path->GetEntryLabel());
   4989     }
   4990     if (cls->MustGenerateClinitCheck()) {
   4991       GenerateClassInitializationCheck(slow_path, out);
   4992     } else {
   4993       __ Bind(slow_path->GetExitLabel());
   4994     }
   4995   }
   4996 }
   4997 
   4998 static MemOperand GetExceptionTlsAddress() {
   4999   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
   5000 }
   5001 
   5002 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
   5003   LocationSummary* locations =
   5004       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
   5005   locations->SetOut(Location::RequiresRegister());
   5006 }
   5007 
   5008 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
   5009   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
   5010 }
   5011 
   5012 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
   5013   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
   5014 }
   5015 
   5016 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
   5017   __ Str(wzr, GetExceptionTlsAddress());
   5018 }
   5019 
   5020 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
   5021     HLoadString::LoadKind desired_string_load_kind) {
   5022   switch (desired_string_load_kind) {
   5023     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
   5024       DCHECK(!GetCompilerOptions().GetCompilePic());
   5025       break;
   5026     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
   5027       DCHECK(GetCompilerOptions().GetCompilePic());
   5028       break;
   5029     case HLoadString::LoadKind::kBootImageAddress:
   5030       break;
   5031     case HLoadString::LoadKind::kBssEntry:
   5032       DCHECK(!Runtime::Current()->UseJitCompilation());
   5033       break;
   5034     case HLoadString::LoadKind::kJitTableAddress:
   5035       DCHECK(Runtime::Current()->UseJitCompilation());
   5036       break;
   5037     case HLoadString::LoadKind::kDexCacheViaMethod:
   5038       break;
   5039   }
   5040   return desired_string_load_kind;
   5041 }
   5042 
   5043 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
   5044   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   5045   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   5046   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
   5047     InvokeRuntimeCallingConvention calling_convention;
   5048     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
   5049   } else {
   5050     locations->SetOut(Location::RequiresRegister());
   5051     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
   5052       if (!kUseReadBarrier || kUseBakerReadBarrier) {
   5053         // Rely on the pResolveString and marking to save everything we need.
   5054         locations->AddTemp(FixedTempLocation());
   5055         RegisterSet caller_saves = RegisterSet::Empty();
   5056         InvokeRuntimeCallingConvention calling_convention;
   5057         caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
   5058         DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
   5059                   RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
   5060                                Primitive::kPrimNot).GetCode());
   5061         locations->SetCustomSlowPathCallerSaves(caller_saves);
   5062       } else {
   5063         // For non-Baker read barrier we have a temp-clobbering call.
   5064       }
   5065     }
   5066   }
   5067 }
   5068 
   5069 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   5070 // move.
   5071 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
   5072   Register out = OutputRegister(load);
   5073   Location out_loc = load->GetLocations()->Out();
   5074 
   5075   switch (load->GetLoadKind()) {
   5076     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
   5077       __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
   5078                                                               load->GetStringIndex()));
   5079       return;  // No dex cache slow path.
   5080     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
   5081       // Add ADRP with its PC-relative String patch.
   5082       const DexFile& dex_file = load->GetDexFile();
   5083       const dex::StringIndex string_index = load->GetStringIndex();
   5084       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
   5085       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
   5086       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
   5087       // Add ADD with its PC-relative String patch.
   5088       vixl::aarch64::Label* add_label =
   5089           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
   5090       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
   5091       return;  // No dex cache slow path.
   5092     }
   5093     case HLoadString::LoadKind::kBootImageAddress: {
   5094       uint32_t address = dchecked_integral_cast<uint32_t>(
   5095           reinterpret_cast<uintptr_t>(load->GetString().Get()));
   5096       DCHECK_NE(address, 0u);
   5097       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
   5098       return;  // No dex cache slow path.
   5099     }
   5100     case HLoadString::LoadKind::kBssEntry: {
   5101       // Add ADRP with its PC-relative String .bss entry patch.
   5102       const DexFile& dex_file = load->GetDexFile();
   5103       const dex::StringIndex string_index = load->GetStringIndex();
   5104       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
   5105       Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
   5106       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
   5107       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
   5108       // Add LDR with its PC-relative String patch.
   5109       vixl::aarch64::Label* ldr_label =
   5110           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
   5111       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
   5112       GenerateGcRootFieldLoad(load,
   5113                               out_loc,
   5114                               temp,
   5115                               /* offset placeholder */ 0u,
   5116                               ldr_label,
   5117                               kCompilerReadBarrierOption);
   5118       SlowPathCodeARM64* slow_path =
   5119           new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
   5120       codegen_->AddSlowPath(slow_path);
   5121       __ Cbz(out.X(), slow_path->GetEntryLabel());
   5122       __ Bind(slow_path->GetExitLabel());
   5123       return;
   5124     }
   5125     case HLoadString::LoadKind::kJitTableAddress: {
   5126       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
   5127                                                         load->GetStringIndex(),
   5128                                                         load->GetString()));
   5129       GenerateGcRootFieldLoad(load,
   5130                               out_loc,
   5131                               out.X(),
   5132                               /* offset */ 0,
   5133                               /* fixup_label */ nullptr,
   5134                               kCompilerReadBarrierOption);
   5135       return;
   5136     }
   5137     default:
   5138       break;
   5139   }
   5140 
   5141   // TODO: Re-add the compiler code to do string dex cache lookup again.
   5142   InvokeRuntimeCallingConvention calling_convention;
   5143   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
   5144   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
   5145   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   5146   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
   5147 }
   5148 
   5149 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
   5150   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   5151   locations->SetOut(Location::ConstantLocation(constant));
   5152 }
   5153 
   5154 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   5155   // Will be generated at use site.
   5156 }
   5157 
   5158 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   5159   LocationSummary* locations =
   5160       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5161   InvokeRuntimeCallingConvention calling_convention;
   5162   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5163 }
   5164 
   5165 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   5166   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
   5167                           instruction,
   5168                           instruction->GetDexPc());
   5169   if (instruction->IsEnter()) {
   5170     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
   5171   } else {
   5172     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   5173   }
   5174 }
   5175 
   5176 void LocationsBuilderARM64::VisitMul(HMul* mul) {
   5177   LocationSummary* locations =
   5178       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
   5179   switch (mul->GetResultType()) {
   5180     case Primitive::kPrimInt:
   5181     case Primitive::kPrimLong:
   5182       locations->SetInAt(0, Location::RequiresRegister());
   5183       locations->SetInAt(1, Location::RequiresRegister());
   5184       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5185       break;
   5186 
   5187     case Primitive::kPrimFloat:
   5188     case Primitive::kPrimDouble:
   5189       locations->SetInAt(0, Location::RequiresFpuRegister());
   5190       locations->SetInAt(1, Location::RequiresFpuRegister());
   5191       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   5192       break;
   5193 
   5194     default:
   5195       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   5196   }
   5197 }
   5198 
   5199 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
   5200   switch (mul->GetResultType()) {
   5201     case Primitive::kPrimInt:
   5202     case Primitive::kPrimLong:
   5203       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
   5204       break;
   5205 
   5206     case Primitive::kPrimFloat:
   5207     case Primitive::kPrimDouble:
   5208       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
   5209       break;
   5210 
   5211     default:
   5212       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   5213   }
   5214 }
   5215 
   5216 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
   5217   LocationSummary* locations =
   5218       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   5219   switch (neg->GetResultType()) {
   5220     case Primitive::kPrimInt:
   5221     case Primitive::kPrimLong:
   5222       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
   5223       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5224       break;
   5225 
   5226     case Primitive::kPrimFloat:
   5227     case Primitive::kPrimDouble:
   5228       locations->SetInAt(0, Location::RequiresFpuRegister());
   5229       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   5230       break;
   5231 
   5232     default:
   5233       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   5234   }
   5235 }
   5236 
   5237 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
   5238   switch (neg->GetResultType()) {
   5239     case Primitive::kPrimInt:
   5240     case Primitive::kPrimLong:
   5241       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
   5242       break;
   5243 
   5244     case Primitive::kPrimFloat:
   5245     case Primitive::kPrimDouble:
   5246       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
   5247       break;
   5248 
   5249     default:
   5250       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   5251   }
   5252 }
   5253 
   5254 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
   5255   LocationSummary* locations =
   5256       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5257   InvokeRuntimeCallingConvention calling_convention;
   5258   locations->SetOut(LocationFrom(x0));
   5259   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5260   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   5261 }
   5262 
   5263 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
   5264   // Note: if heap poisoning is enabled, the entry point takes cares
   5265   // of poisoning the reference.
   5266   QuickEntrypointEnum entrypoint =
   5267       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
   5268   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   5269   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
   5270 }
   5271 
   5272 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
   5273   LocationSummary* locations =
   5274       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5275   InvokeRuntimeCallingConvention calling_convention;
   5276   if (instruction->IsStringAlloc()) {
   5277     locations->AddTemp(LocationFrom(kArtMethodRegister));
   5278   } else {
   5279     locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5280   }
   5281   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
   5282 }
   5283 
   5284 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
   5285   // Note: if heap poisoning is enabled, the entry point takes cares
   5286   // of poisoning the reference.
   5287   if (instruction->IsStringAlloc()) {
   5288     // String is allocated through StringFactory. Call NewEmptyString entry point.
   5289     Location temp = instruction->GetLocations()->GetTemp(0);
   5290     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
   5291     __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
   5292     __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
   5293 
   5294     {
   5295       // Ensure the pc position is recorded immediately after the `blr` instruction.
   5296       ExactAssemblyScope eas(GetVIXLAssembler(),
   5297                              kInstructionSize,
   5298                              CodeBufferCheckScope::kExactSize);
   5299       __ blr(lr);
   5300       codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   5301     }
   5302   } else {
   5303     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
   5304     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   5305   }
   5306 }
   5307 
   5308 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
   5309   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5310   locations->SetInAt(0, Location::RequiresRegister());
   5311   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5312 }
   5313 
   5314 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
   5315   switch (instruction->GetResultType()) {
   5316     case Primitive::kPrimInt:
   5317     case Primitive::kPrimLong:
   5318       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
   5319       break;
   5320 
   5321     default:
   5322       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
   5323   }
   5324 }
   5325 
   5326 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
   5327   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5328   locations->SetInAt(0, Location::RequiresRegister());
   5329   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5330 }
   5331 
   5332 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
   5333   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
   5334 }
   5335 
   5336 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
   5337   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   5338   locations->SetInAt(0, Location::RequiresRegister());
   5339 }
   5340 
   5341 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
   5342   if (CanMoveNullCheckToUser(instruction)) {
   5343     return;
   5344   }
   5345   {
   5346     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
   5347     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   5348     Location obj = instruction->GetLocations()->InAt(0);
   5349     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
   5350     RecordPcInfo(instruction, instruction->GetDexPc());
   5351   }
   5352 }
   5353 
   5354 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   5355   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
   5356   AddSlowPath(slow_path);
   5357 
   5358   LocationSummary* locations = instruction->GetLocations();
   5359   Location obj = locations->InAt(0);
   5360 
   5361   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
   5362 }
   5363 
   5364 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
   5365   codegen_->GenerateNullCheck(instruction);
   5366 }
   5367 
   5368 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
   5369   HandleBinaryOp(instruction);
   5370 }
   5371 
   5372 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
   5373   HandleBinaryOp(instruction);
   5374 }
   5375 
   5376 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   5377   LOG(FATAL) << "Unreachable";
   5378 }
   5379 
   5380 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
   5381   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
   5382 }
   5383 
   5384 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
   5385   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5386   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
   5387   if (location.IsStackSlot()) {
   5388     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   5389   } else if (location.IsDoubleStackSlot()) {
   5390     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   5391   }
   5392   locations->SetOut(location);
   5393 }
   5394 
   5395 void InstructionCodeGeneratorARM64::VisitParameterValue(
   5396     HParameterValue* instruction ATTRIBUTE_UNUSED) {
   5397   // Nothing to do, the parameter is already at its location.
   5398 }
   5399 
   5400 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
   5401   LocationSummary* locations =
   5402       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   5403   locations->SetOut(LocationFrom(kArtMethodRegister));
   5404 }
   5405 
   5406 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
   5407     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
   5408   // Nothing to do, the method is already at its location.
   5409 }
   5410 
   5411 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
   5412   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5413   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
   5414     locations->SetInAt(i, Location::Any());
   5415   }
   5416   locations->SetOut(Location::Any());
   5417 }
   5418 
   5419 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   5420   LOG(FATAL) << "Unreachable";
   5421 }
   5422 
   5423 void LocationsBuilderARM64::VisitRem(HRem* rem) {
   5424   Primitive::Type type = rem->GetResultType();
   5425   LocationSummary::CallKind call_kind =
   5426       Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
   5427                                            : LocationSummary::kNoCall;
   5428   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
   5429 
   5430   switch (type) {
   5431     case Primitive::kPrimInt:
   5432     case Primitive::kPrimLong:
   5433       locations->SetInAt(0, Location::RequiresRegister());
   5434       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
   5435       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5436       break;
   5437 
   5438     case Primitive::kPrimFloat:
   5439     case Primitive::kPrimDouble: {
   5440       InvokeRuntimeCallingConvention calling_convention;
   5441       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
   5442       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
   5443       locations->SetOut(calling_convention.GetReturnLocation(type));
   5444 
   5445       break;
   5446     }
   5447 
   5448     default:
   5449       LOG(FATAL) << "Unexpected rem type " << type;
   5450   }
   5451 }
   5452 
   5453 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
   5454   Primitive::Type type = rem->GetResultType();
   5455 
   5456   switch (type) {
   5457     case Primitive::kPrimInt:
   5458     case Primitive::kPrimLong: {
   5459       GenerateDivRemIntegral(rem);
   5460       break;
   5461     }
   5462 
   5463     case Primitive::kPrimFloat:
   5464     case Primitive::kPrimDouble: {
   5465       QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod;
   5466       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
   5467       if (type == Primitive::kPrimFloat) {
   5468         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
   5469       } else {
   5470         CheckEntrypointTypes<kQuickFmod, double, double, double>();
   5471       }
   5472       break;
   5473     }
   5474 
   5475     default:
   5476       LOG(FATAL) << "Unexpected rem type " << type;
   5477       UNREACHABLE();
   5478   }
   5479 }
   5480 
   5481 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   5482   memory_barrier->SetLocations(nullptr);
   5483 }
   5484 
   5485 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   5486   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
   5487 }
   5488 
   5489 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
   5490   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   5491   Primitive::Type return_type = instruction->InputAt(0)->GetType();
   5492   locations->SetInAt(0, ARM64ReturnLocation(return_type));
   5493 }
   5494 
   5495 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
   5496   codegen_->GenerateFrameExit();
   5497 }
   5498 
   5499 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
   5500   instruction->SetLocations(nullptr);
   5501 }
   5502 
   5503 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
   5504   codegen_->GenerateFrameExit();
   5505 }
   5506 
   5507 void LocationsBuilderARM64::VisitRor(HRor* ror) {
   5508   HandleBinaryOp(ror);
   5509 }
   5510 
   5511 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
   5512   HandleBinaryOp(ror);
   5513 }
   5514 
   5515 void LocationsBuilderARM64::VisitShl(HShl* shl) {
   5516   HandleShift(shl);
   5517 }
   5518 
   5519 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
   5520   HandleShift(shl);
   5521 }
   5522 
   5523 void LocationsBuilderARM64::VisitShr(HShr* shr) {
   5524   HandleShift(shr);
   5525 }
   5526 
   5527 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
   5528   HandleShift(shr);
   5529 }
   5530 
   5531 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
   5532   HandleBinaryOp(instruction);
   5533 }
   5534 
   5535 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
   5536   HandleBinaryOp(instruction);
   5537 }
   5538 
   5539 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   5540   HandleFieldGet(instruction, instruction->GetFieldInfo());
   5541 }
   5542 
   5543 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   5544   HandleFieldGet(instruction, instruction->GetFieldInfo());
   5545 }
   5546 
   5547 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   5548   HandleFieldSet(instruction);
   5549 }
   5550 
   5551 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   5552   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   5553 }
   5554 
   5555 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
   5556     HUnresolvedInstanceFieldGet* instruction) {
   5557   FieldAccessCallingConventionARM64 calling_convention;
   5558   codegen_->CreateUnresolvedFieldLocationSummary(
   5559       instruction, instruction->GetFieldType(), calling_convention);
   5560 }
   5561 
   5562 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
   5563     HUnresolvedInstanceFieldGet* instruction) {
   5564   FieldAccessCallingConventionARM64 calling_convention;
   5565   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5566                                           instruction->GetFieldType(),
   5567                                           instruction->GetFieldIndex(),
   5568                                           instruction->GetDexPc(),
   5569                                           calling_convention);
   5570 }
   5571 
   5572 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
   5573     HUnresolvedInstanceFieldSet* instruction) {
   5574   FieldAccessCallingConventionARM64 calling_convention;
   5575   codegen_->CreateUnresolvedFieldLocationSummary(
   5576       instruction, instruction->GetFieldType(), calling_convention);
   5577 }
   5578 
   5579 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
   5580     HUnresolvedInstanceFieldSet* instruction) {
   5581   FieldAccessCallingConventionARM64 calling_convention;
   5582   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5583                                           instruction->GetFieldType(),
   5584                                           instruction->GetFieldIndex(),
   5585                                           instruction->GetDexPc(),
   5586                                           calling_convention);
   5587 }
   5588 
   5589 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
   5590     HUnresolvedStaticFieldGet* instruction) {
   5591   FieldAccessCallingConventionARM64 calling_convention;
   5592   codegen_->CreateUnresolvedFieldLocationSummary(
   5593       instruction, instruction->GetFieldType(), calling_convention);
   5594 }
   5595 
   5596 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
   5597     HUnresolvedStaticFieldGet* instruction) {
   5598   FieldAccessCallingConventionARM64 calling_convention;
   5599   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5600                                           instruction->GetFieldType(),
   5601                                           instruction->GetFieldIndex(),
   5602                                           instruction->GetDexPc(),
   5603                                           calling_convention);
   5604 }
   5605 
   5606 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
   5607     HUnresolvedStaticFieldSet* instruction) {
   5608   FieldAccessCallingConventionARM64 calling_convention;
   5609   codegen_->CreateUnresolvedFieldLocationSummary(
   5610       instruction, instruction->GetFieldType(), calling_convention);
   5611 }
   5612 
   5613 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
   5614     HUnresolvedStaticFieldSet* instruction) {
   5615   FieldAccessCallingConventionARM64 calling_convention;
   5616   codegen_->GenerateUnresolvedFieldAccess(instruction,
   5617                                           instruction->GetFieldType(),
   5618                                           instruction->GetFieldIndex(),
   5619                                           instruction->GetDexPc(),
   5620                                           calling_convention);
   5621 }
   5622 
   5623 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5624   LocationSummary* locations =
   5625       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
   5626   // In suspend check slow path, usually there are no caller-save registers at all.
   5627   // If SIMD instructions are present, however, we force spilling all live SIMD
   5628   // registers in full width (since the runtime only saves/restores lower part).
   5629   locations->SetCustomSlowPathCallerSaves(
   5630       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
   5631 }
   5632 
   5633 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5634   HBasicBlock* block = instruction->GetBlock();
   5635   if (block->GetLoopInformation() != nullptr) {
   5636     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
   5637     // The back edge will generate the suspend check.
   5638     return;
   5639   }
   5640   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
   5641     // The goto will generate the suspend check.
   5642     return;
   5643   }
   5644   GenerateSuspendCheck(instruction, nullptr);
   5645 }
   5646 
   5647 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
   5648   LocationSummary* locations =
   5649       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5650   InvokeRuntimeCallingConvention calling_convention;
   5651   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   5652 }
   5653 
   5654 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
   5655   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
   5656   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
   5657 }
   5658 
   5659 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
   5660   LocationSummary* locations =
   5661       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   5662   Primitive::Type input_type = conversion->GetInputType();
   5663   Primitive::Type result_type = conversion->GetResultType();
   5664   DCHECK_NE(input_type, result_type);
   5665   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
   5666       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
   5667     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
   5668   }
   5669 
   5670   if (Primitive::IsFloatingPointType(input_type)) {
   5671     locations->SetInAt(0, Location::RequiresFpuRegister());
   5672   } else {
   5673     locations->SetInAt(0, Location::RequiresRegister());
   5674   }
   5675 
   5676   if (Primitive::IsFloatingPointType(result_type)) {
   5677     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   5678   } else {
   5679     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5680   }
   5681 }
   5682 
   5683 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
   5684   Primitive::Type result_type = conversion->GetResultType();
   5685   Primitive::Type input_type = conversion->GetInputType();
   5686 
   5687   DCHECK_NE(input_type, result_type);
   5688 
   5689   if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
   5690     int result_size = Primitive::ComponentSize(result_type);
   5691     int input_size = Primitive::ComponentSize(input_type);
   5692     int min_size = std::min(result_size, input_size);
   5693     Register output = OutputRegister(conversion);
   5694     Register source = InputRegisterAt(conversion, 0);
   5695     if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
   5696       // 'int' values are used directly as W registers, discarding the top
   5697       // bits, so we don't need to sign-extend and can just perform a move.
   5698       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
   5699       // top 32 bits of the target register. We theoretically could leave those
   5700       // bits unchanged, but we would have to make sure that no code uses a
   5701       // 32bit input value as a 64bit value assuming that the top 32 bits are
   5702       // zero.
   5703       __ Mov(output.W(), source.W());
   5704     } else if (result_type == Primitive::kPrimChar ||
   5705                (input_type == Primitive::kPrimChar && input_size < result_size)) {
   5706       __ Ubfx(output,
   5707               output.IsX() ? source.X() : source.W(),
   5708               0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
   5709     } else {
   5710       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
   5711     }
   5712   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
   5713     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
   5714   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
   5715     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
   5716     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
   5717   } else if (Primitive::IsFloatingPointType(result_type) &&
   5718              Primitive::IsFloatingPointType(input_type)) {
   5719     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
   5720   } else {
   5721     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
   5722                 << " to " << result_type;
   5723   }
   5724 }
   5725 
   5726 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
   5727   HandleShift(ushr);
   5728 }
   5729 
   5730 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
   5731   HandleShift(ushr);
   5732 }
   5733 
   5734 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
   5735   HandleBinaryOp(instruction);
   5736 }
   5737 
   5738 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
   5739   HandleBinaryOp(instruction);
   5740 }
   5741 
   5742 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   5743   // Nothing to do, this should be removed during prepare for register allocator.
   5744   LOG(FATAL) << "Unreachable";
   5745 }
   5746 
   5747 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   5748   // Nothing to do, this should be removed during prepare for register allocator.
   5749   LOG(FATAL) << "Unreachable";
   5750 }
   5751 
   5752 // Simple implementation of packed switch - generate cascaded compare/jumps.
   5753 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   5754   LocationSummary* locations =
   5755       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   5756   locations->SetInAt(0, Location::RequiresRegister());
   5757 }
   5758 
   5759 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   5760   int32_t lower_bound = switch_instr->GetStartValue();
   5761   uint32_t num_entries = switch_instr->GetNumEntries();
   5762   Register value_reg = InputRegisterAt(switch_instr, 0);
   5763   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   5764 
   5765   // Roughly set 16 as max average assemblies generated per HIR in a graph.
   5766   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
   5767   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
   5768   // make sure we don't emit it if the target may run out of range.
   5769   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
   5770   // ranges and emit the tables only as required.
   5771   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
   5772 
   5773   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
   5774       // Current instruction id is an upper bound of the number of HIRs in the graph.
   5775       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
   5776     // Create a series of compare/jumps.
   5777     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   5778     Register temp = temps.AcquireW();
   5779     __ Subs(temp, value_reg, Operand(lower_bound));
   5780 
   5781     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   5782     // Jump to successors[0] if value == lower_bound.
   5783     __ B(eq, codegen_->GetLabelOf(successors[0]));
   5784     int32_t last_index = 0;
   5785     for (; num_entries - last_index > 2; last_index += 2) {
   5786       __ Subs(temp, temp, Operand(2));
   5787       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
   5788       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
   5789       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
   5790       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
   5791     }
   5792     if (num_entries - last_index == 2) {
   5793       // The last missing case_value.
   5794       __ Cmp(temp, Operand(1));
   5795       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
   5796     }
   5797 
   5798     // And the default for any other value.
   5799     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
   5800       __ B(codegen_->GetLabelOf(default_block));
   5801     }
   5802   } else {
   5803     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
   5804 
   5805     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   5806 
   5807     // Below instructions should use at most one blocked register. Since there are two blocked
   5808     // registers, we are free to block one.
   5809     Register temp_w = temps.AcquireW();
   5810     Register index;
   5811     // Remove the bias.
   5812     if (lower_bound != 0) {
   5813       index = temp_w;
   5814       __ Sub(index, value_reg, Operand(lower_bound));
   5815     } else {
   5816       index = value_reg;
   5817     }
   5818 
   5819     // Jump to default block if index is out of the range.
   5820     __ Cmp(index, Operand(num_entries));
   5821     __ B(hs, codegen_->GetLabelOf(default_block));
   5822 
   5823     // In current VIXL implementation, it won't require any blocked registers to encode the
   5824     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
   5825     // register pressure.
   5826     Register table_base = temps.AcquireX();
   5827     // Load jump offset from the table.
   5828     __ Adr(table_base, jump_table->GetTableStartLabel());
   5829     Register jump_offset = temp_w;
   5830     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
   5831 
   5832     // Jump to target block by branching to table_base(pc related) + offset.
   5833     Register target_address = table_base;
   5834     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
   5835     __ Br(target_address);
   5836   }
   5837 }
   5838 
   5839 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
   5840     HInstruction* instruction,
   5841     Location out,
   5842     uint32_t offset,
   5843     Location maybe_temp,
   5844     ReadBarrierOption read_barrier_option) {
   5845   Primitive::Type type = Primitive::kPrimNot;
   5846   Register out_reg = RegisterFrom(out, type);
   5847   if (read_barrier_option == kWithReadBarrier) {
   5848     CHECK(kEmitCompilerReadBarrier);
   5849     if (kUseBakerReadBarrier) {
   5850       // Load with fast path based Baker's read barrier.
   5851       // /* HeapReference<Object> */ out = *(out + offset)
   5852       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
   5853                                                       out,
   5854                                                       out_reg,
   5855                                                       offset,
   5856                                                       maybe_temp,
   5857                                                       /* needs_null_check */ false,
   5858                                                       /* use_load_acquire */ false);
   5859     } else {
   5860       // Load with slow path based read barrier.
   5861       // Save the value of `out` into `maybe_temp` before overwriting it
   5862       // in the following move operation, as we will need it for the
   5863       // read barrier below.
   5864       Register temp_reg = RegisterFrom(maybe_temp, type);
   5865       __ Mov(temp_reg, out_reg);
   5866       // /* HeapReference<Object> */ out = *(out + offset)
   5867       __ Ldr(out_reg, HeapOperand(out_reg, offset));
   5868       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
   5869     }
   5870   } else {
   5871     // Plain load with no read barrier.
   5872     // /* HeapReference<Object> */ out = *(out + offset)
   5873     __ Ldr(out_reg, HeapOperand(out_reg, offset));
   5874     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
   5875   }
   5876 }
   5877 
   5878 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
   5879     HInstruction* instruction,
   5880     Location out,
   5881     Location obj,
   5882     uint32_t offset,
   5883     Location maybe_temp,
   5884     ReadBarrierOption read_barrier_option) {
   5885   Primitive::Type type = Primitive::kPrimNot;
   5886   Register out_reg = RegisterFrom(out, type);
   5887   Register obj_reg = RegisterFrom(obj, type);
   5888   if (read_barrier_option == kWithReadBarrier) {
   5889     CHECK(kEmitCompilerReadBarrier);
   5890     if (kUseBakerReadBarrier) {
   5891       // Load with fast path based Baker's read barrier.
   5892       // /* HeapReference<Object> */ out = *(obj + offset)
   5893       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
   5894                                                       out,
   5895                                                       obj_reg,
   5896                                                       offset,
   5897                                                       maybe_temp,
   5898                                                       /* needs_null_check */ false,
   5899                                                       /* use_load_acquire */ false);
   5900     } else {
   5901       // Load with slow path based read barrier.
   5902       // /* HeapReference<Object> */ out = *(obj + offset)
   5903       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
   5904       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
   5905     }
   5906   } else {
   5907     // Plain load with no read barrier.
   5908     // /* HeapReference<Object> */ out = *(obj + offset)
   5909     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
   5910     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
   5911   }
   5912 }
   5913 
   5914 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
   5915     HInstruction* instruction,
   5916     Location root,
   5917     Register obj,
   5918     uint32_t offset,
   5919     vixl::aarch64::Label* fixup_label,
   5920     ReadBarrierOption read_barrier_option) {
   5921   DCHECK(fixup_label == nullptr || offset == 0u);
   5922   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
   5923   if (read_barrier_option == kWithReadBarrier) {
   5924     DCHECK(kEmitCompilerReadBarrier);
   5925     if (kUseBakerReadBarrier) {
   5926       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
   5927       // Baker's read barrier are used.
   5928       if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
   5929           !Runtime::Current()->UseJitCompilation()) {
   5930         // Note that we do not actually check the value of `GetIsGcMarking()`
   5931         // to decide whether to mark the loaded GC root or not.  Instead, we
   5932         // load into `temp` the read barrier mark introspection entrypoint.
   5933         // If `temp` is null, it means that `GetIsGcMarking()` is false, and
   5934         // vice versa.
   5935         //
   5936         // We use link-time generated thunks for the slow path. That thunk
   5937         // checks the reference and jumps to the entrypoint if needed.
   5938         //
   5939         //     temp = Thread::Current()->pReadBarrierMarkIntrospection
   5940         //     lr = &return_address;
   5941         //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
   5942         //     if (temp != nullptr) {
   5943         //        goto gc_root_thunk<root_reg>(lr)
   5944         //     }
   5945         //   return_address:
   5946 
   5947         UseScratchRegisterScope temps(GetVIXLAssembler());
   5948         DCHECK(temps.IsAvailable(ip0));
   5949         DCHECK(temps.IsAvailable(ip1));
   5950         temps.Exclude(ip0, ip1);
   5951         uint32_t custom_data =
   5952             linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
   5953         vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
   5954 
   5955         // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
   5956         DCHECK_EQ(ip0.GetCode(), 16u);
   5957         const int32_t entry_point_offset =
   5958             CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
   5959         __ Ldr(ip1, MemOperand(tr, entry_point_offset));
   5960         EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
   5961         vixl::aarch64::Label return_address;
   5962         __ adr(lr, &return_address);
   5963         if (fixup_label != nullptr) {
   5964           __ Bind(fixup_label);
   5965         }
   5966         static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
   5967                       "GC root LDR must be 2 instruction (8B) before the return address label.");
   5968         __ ldr(root_reg, MemOperand(obj.X(), offset));
   5969         __ Bind(cbnz_label);
   5970         __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
   5971         __ Bind(&return_address);
   5972       } else {
   5973         // Note that we do not actually check the value of
   5974         // `GetIsGcMarking()` to decide whether to mark the loaded GC
   5975         // root or not.  Instead, we load into `temp` the read barrier
   5976         // mark entry point corresponding to register `root`. If `temp`
   5977         // is null, it means that `GetIsGcMarking()` is false, and vice
   5978         // versa.
   5979         //
   5980         //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   5981         //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
   5982         //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
   5983         //     // Slow path.
   5984         //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
   5985         //   }
   5986 
   5987         // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
   5988         Register temp = lr;
   5989         SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
   5990             instruction, root, /* entrypoint */ LocationFrom(temp));
   5991         codegen_->AddSlowPath(slow_path);
   5992 
   5993         // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   5994         const int32_t entry_point_offset =
   5995             CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
   5996         // Loading the entrypoint does not require a load acquire since it is only changed when
   5997         // threads are suspended or running a checkpoint.
   5998         __ Ldr(temp, MemOperand(tr, entry_point_offset));
   5999 
   6000         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
   6001         if (fixup_label == nullptr) {
   6002           __ Ldr(root_reg, MemOperand(obj, offset));
   6003         } else {
   6004           codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
   6005         }
   6006         static_assert(
   6007             sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
   6008             "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
   6009             "have different sizes.");
   6010         static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
   6011                       "art::mirror::CompressedReference<mirror::Object> and int32_t "
   6012                       "have different sizes.");
   6013 
   6014         // The entrypoint is null when the GC is not marking, this prevents one load compared to
   6015         // checking GetIsGcMarking.
   6016         __ Cbnz(temp, slow_path->GetEntryLabel());
   6017         __ Bind(slow_path->GetExitLabel());
   6018       }
   6019     } else {
   6020       // GC root loaded through a slow path for read barriers other
   6021       // than Baker's.
   6022       // /* GcRoot<mirror::Object>* */ root = obj + offset
   6023       if (fixup_label == nullptr) {
   6024         __ Add(root_reg.X(), obj.X(), offset);
   6025       } else {
   6026         codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
   6027       }
   6028       // /* mirror::Object* */ root = root->Read()
   6029       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
   6030     }
   6031   } else {
   6032     // Plain GC root load with no read barrier.
   6033     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
   6034     if (fixup_label == nullptr) {
   6035       __ Ldr(root_reg, MemOperand(obj, offset));
   6036     } else {
   6037       codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
   6038     }
   6039     // Note that GC roots are not affected by heap poisoning, thus we
   6040     // do not have to unpoison `root_reg` here.
   6041   }
   6042 }
   6043 
   6044 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
   6045                                                                Location ref,
   6046                                                                Register obj,
   6047                                                                uint32_t offset,
   6048                                                                Location maybe_temp,
   6049                                                                bool needs_null_check,
   6050                                                                bool use_load_acquire) {
   6051   DCHECK(kEmitCompilerReadBarrier);
   6052   DCHECK(kUseBakerReadBarrier);
   6053 
   6054   if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
   6055       !use_load_acquire &&
   6056       !Runtime::Current()->UseJitCompilation()) {
   6057     // Note that we do not actually check the value of `GetIsGcMarking()`
   6058     // to decide whether to mark the loaded GC root or not.  Instead, we
   6059     // load into `temp` the read barrier mark introspection entrypoint.
   6060     // If `temp` is null, it means that `GetIsGcMarking()` is false, and
   6061     // vice versa.
   6062     //
   6063     // We use link-time generated thunks for the slow path. That thunk checks
   6064     // the holder and jumps to the entrypoint if needed. If the holder is not
   6065     // gray, it creates a fake dependency and returns to the LDR instruction.
   6066     //
   6067     //     temp = Thread::Current()->pReadBarrierMarkIntrospection
   6068     //     lr = &return_address;
   6069     //     if (temp != nullptr) {
   6070     //        goto field_thunk<holder_reg, base_reg>(lr)
   6071     //     }
   6072     //   not_gray_return_address:
   6073     //     // Original reference load. If the offset is too large to fit
   6074     //     // into LDR, we use an adjusted base register here.
   6075     //     GcRoot<mirror::Object> root = *(obj+offset);
   6076     //   gray_return_address:
   6077 
   6078     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
   6079     Register base = obj;
   6080     if (offset >= kReferenceLoadMinFarOffset) {
   6081       DCHECK(maybe_temp.IsRegister());
   6082       base = WRegisterFrom(maybe_temp);
   6083       static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
   6084       __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
   6085       offset &= (kReferenceLoadMinFarOffset - 1u);
   6086     }
   6087     UseScratchRegisterScope temps(GetVIXLAssembler());
   6088     DCHECK(temps.IsAvailable(ip0));
   6089     DCHECK(temps.IsAvailable(ip1));
   6090     temps.Exclude(ip0, ip1);
   6091     uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
   6092         base.GetCode(),
   6093         obj.GetCode());
   6094     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
   6095 
   6096     // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
   6097     DCHECK_EQ(ip0.GetCode(), 16u);
   6098     const int32_t entry_point_offset =
   6099         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
   6100     __ Ldr(ip1, MemOperand(tr, entry_point_offset));
   6101     EmissionCheckScope guard(GetVIXLAssembler(),
   6102                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
   6103     vixl::aarch64::Label return_address;
   6104     __ adr(lr, &return_address);
   6105     __ Bind(cbnz_label);
   6106     __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
   6107     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
   6108                   "Field LDR must be 1 instruction (4B) before the return address label; "
   6109                   " 2 instructions (8B) for heap poisoning.");
   6110     Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
   6111     __ ldr(ref_reg, MemOperand(base.X(), offset));
   6112     if (needs_null_check) {
   6113       MaybeRecordImplicitNullCheck(instruction);
   6114     }
   6115     GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
   6116     __ Bind(&return_address);
   6117     return;
   6118   }
   6119 
   6120   // /* HeapReference<Object> */ ref = *(obj + offset)
   6121   Register temp = WRegisterFrom(maybe_temp);
   6122   Location no_index = Location::NoLocation();
   6123   size_t no_scale_factor = 0u;
   6124   GenerateReferenceLoadWithBakerReadBarrier(instruction,
   6125                                             ref,
   6126                                             obj,
   6127                                             offset,
   6128                                             no_index,
   6129                                             no_scale_factor,
   6130                                             temp,
   6131                                             needs_null_check,
   6132                                             use_load_acquire);
   6133 }
   6134 
   6135 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
   6136                                                                Location ref,
   6137                                                                Register obj,
   6138                                                                uint32_t data_offset,
   6139                                                                Location index,
   6140                                                                Register temp,
   6141                                                                bool needs_null_check) {
   6142   DCHECK(kEmitCompilerReadBarrier);
   6143   DCHECK(kUseBakerReadBarrier);
   6144 
   6145   // Array cells are never volatile variables, therefore array loads
   6146   // never use Load-Acquire instructions on ARM64.
   6147   const bool use_load_acquire = false;
   6148 
   6149   static_assert(
   6150       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   6151       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   6152   // /* HeapReference<Object> */ ref =
   6153   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   6154   size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
   6155   GenerateReferenceLoadWithBakerReadBarrier(instruction,
   6156                                             ref,
   6157                                             obj,
   6158                                             data_offset,
   6159                                             index,
   6160                                             scale_factor,
   6161                                             temp,
   6162                                             needs_null_check,
   6163                                             use_load_acquire);
   6164 }
   6165 
   6166 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
   6167                                                                    Location ref,
   6168                                                                    Register obj,
   6169                                                                    uint32_t offset,
   6170                                                                    Location index,
   6171                                                                    size_t scale_factor,
   6172                                                                    Register temp,
   6173                                                                    bool needs_null_check,
   6174                                                                    bool use_load_acquire,
   6175                                                                    bool always_update_field) {
   6176   DCHECK(kEmitCompilerReadBarrier);
   6177   DCHECK(kUseBakerReadBarrier);
   6178   // If we are emitting an array load, we should not be using a
   6179   // Load Acquire instruction.  In other words:
   6180   // `instruction->IsArrayGet()` => `!use_load_acquire`.
   6181   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
   6182 
   6183   // Query `art::Thread::Current()->GetIsGcMarking()` to decide
   6184   // whether we need to enter the slow path to mark the reference.
   6185   // Then, in the slow path, check the gray bit in the lock word of
   6186   // the reference's holder (`obj`) to decide whether to mark `ref` or
   6187   // not.
   6188   //
   6189   // Note that we do not actually check the value of `GetIsGcMarking()`;
   6190   // instead, we load into `temp2` the read barrier mark entry point
   6191   // corresponding to register `ref`. If `temp2` is null, it means
   6192   // that `GetIsGcMarking()` is false, and vice versa.
   6193   //
   6194   //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   6195   //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
   6196   //     // Slow path.
   6197   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   6198   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   6199   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   6200   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   6201   //     if (is_gray) {
   6202   //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
   6203   //     }
   6204   //   } else {
   6205   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   6206   //   }
   6207 
   6208   // Slow path marking the object `ref` when the GC is marking. The
   6209   // entrypoint will already be loaded in `temp2`.
   6210   Register temp2 = lr;
   6211   Location temp2_loc = LocationFrom(temp2);
   6212   SlowPathCodeARM64* slow_path;
   6213   if (always_update_field) {
   6214     // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
   6215     // only supports address of the form `obj + field_offset`, where
   6216     // `obj` is a register and `field_offset` is a register. Thus
   6217     // `offset` and `scale_factor` above are expected to be null in
   6218     // this code path.
   6219     DCHECK_EQ(offset, 0u);
   6220     DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
   6221     Location field_offset = index;
   6222     slow_path =
   6223         new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
   6224             instruction,
   6225             ref,
   6226             obj,
   6227             offset,
   6228             /* index */ field_offset,
   6229             scale_factor,
   6230             needs_null_check,
   6231             use_load_acquire,
   6232             temp,
   6233             /* entrypoint */ temp2_loc);
   6234   } else {
   6235     slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
   6236         instruction,
   6237         ref,
   6238         obj,
   6239         offset,
   6240         index,
   6241         scale_factor,
   6242         needs_null_check,
   6243         use_load_acquire,
   6244         temp,
   6245         /* entrypoint */ temp2_loc);
   6246   }
   6247   AddSlowPath(slow_path);
   6248 
   6249   // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
   6250   const int32_t entry_point_offset =
   6251       CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
   6252   // Loading the entrypoint does not require a load acquire since it is only changed when
   6253   // threads are suspended or running a checkpoint.
   6254   __ Ldr(temp2, MemOperand(tr, entry_point_offset));
   6255   // The entrypoint is null when the GC is not marking, this prevents one load compared to
   6256   // checking GetIsGcMarking.
   6257   __ Cbnz(temp2, slow_path->GetEntryLabel());
   6258   // Fast path: just load the reference.
   6259   GenerateRawReferenceLoad(
   6260       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
   6261   __ Bind(slow_path->GetExitLabel());
   6262 }
   6263 
   6264 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
   6265                                                   Location ref,
   6266                                                   Register obj,
   6267                                                   uint32_t offset,
   6268                                                   Location index,
   6269                                                   size_t scale_factor,
   6270                                                   bool needs_null_check,
   6271                                                   bool use_load_acquire) {
   6272   DCHECK(obj.IsW());
   6273   Primitive::Type type = Primitive::kPrimNot;
   6274   Register ref_reg = RegisterFrom(ref, type);
   6275 
   6276   // If needed, vixl::EmissionCheckScope guards are used to ensure
   6277   // that no pools are emitted between the load (macro) instruction
   6278   // and MaybeRecordImplicitNullCheck.
   6279 
   6280   if (index.IsValid()) {
   6281     // Load types involving an "index": ArrayGet,
   6282     // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
   6283     // intrinsics.
   6284     if (use_load_acquire) {
   6285       // UnsafeGetObjectVolatile intrinsic case.
   6286       // Register `index` is not an index in an object array, but an
   6287       // offset to an object reference field within object `obj`.
   6288       DCHECK(instruction->IsInvoke()) << instruction->DebugName();
   6289       DCHECK(instruction->GetLocations()->Intrinsified());
   6290       DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
   6291           << instruction->AsInvoke()->GetIntrinsic();
   6292       DCHECK_EQ(offset, 0u);
   6293       DCHECK_EQ(scale_factor, 0u);
   6294       DCHECK_EQ(needs_null_check, false);
   6295       // /* HeapReference<mirror::Object> */ ref = *(obj + index)
   6296       MemOperand field = HeapOperand(obj, XRegisterFrom(index));
   6297       LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
   6298     } else {
   6299       // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
   6300       // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
   6301       if (index.IsConstant()) {
   6302         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
   6303         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   6304         Load(type, ref_reg, HeapOperand(obj, computed_offset));
   6305         if (needs_null_check) {
   6306           MaybeRecordImplicitNullCheck(instruction);
   6307         }
   6308       } else {
   6309         UseScratchRegisterScope temps(GetVIXLAssembler());
   6310         Register temp = temps.AcquireW();
   6311         __ Add(temp, obj, offset);
   6312         {
   6313           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   6314           Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
   6315           if (needs_null_check) {
   6316             MaybeRecordImplicitNullCheck(instruction);
   6317           }
   6318         }
   6319       }
   6320     }
   6321   } else {
   6322     // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
   6323     MemOperand field = HeapOperand(obj, offset);
   6324     if (use_load_acquire) {
   6325       // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
   6326       LoadAcquire(instruction, ref_reg, field, needs_null_check);
   6327     } else {
   6328       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
   6329       Load(type, ref_reg, field);
   6330       if (needs_null_check) {
   6331         MaybeRecordImplicitNullCheck(instruction);
   6332       }
   6333     }
   6334   }
   6335 
   6336   // Object* ref = ref_addr->AsMirrorPtr()
   6337   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
   6338 }
   6339 
   6340 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
   6341                                                  Location out,
   6342                                                  Location ref,
   6343                                                  Location obj,
   6344                                                  uint32_t offset,
   6345                                                  Location index) {
   6346   DCHECK(kEmitCompilerReadBarrier);
   6347 
   6348   // Insert a slow path based read barrier *after* the reference load.
   6349   //
   6350   // If heap poisoning is enabled, the unpoisoning of the loaded
   6351   // reference will be carried out by the runtime within the slow
   6352   // path.
   6353   //
   6354   // Note that `ref` currently does not get unpoisoned (when heap
   6355   // poisoning is enabled), which is alright as the `ref` argument is
   6356   // not used by the artReadBarrierSlow entry point.
   6357   //
   6358   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
   6359   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
   6360       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
   6361   AddSlowPath(slow_path);
   6362 
   6363   __ B(slow_path->GetEntryLabel());
   6364   __ Bind(slow_path->GetExitLabel());
   6365 }
   6366 
   6367 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
   6368                                                       Location out,
   6369                                                       Location ref,
   6370                                                       Location obj,
   6371                                                       uint32_t offset,
   6372                                                       Location index) {
   6373   if (kEmitCompilerReadBarrier) {
   6374     // Baker's read barriers shall be handled by the fast path
   6375     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
   6376     DCHECK(!kUseBakerReadBarrier);
   6377     // If heap poisoning is enabled, unpoisoning will be taken care of
   6378     // by the runtime within the slow path.
   6379     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   6380   } else if (kPoisonHeapReferences) {
   6381     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
   6382   }
   6383 }
   6384 
   6385 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
   6386                                                         Location out,
   6387                                                         Location root) {
   6388   DCHECK(kEmitCompilerReadBarrier);
   6389 
   6390   // Insert a slow path based read barrier *after* the GC root load.
   6391   //
   6392   // Note that GC roots are not affected by heap poisoning, so we do
   6393   // not need to do anything special for this here.
   6394   SlowPathCodeARM64* slow_path =
   6395       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
   6396   AddSlowPath(slow_path);
   6397 
   6398   __ B(slow_path->GetEntryLabel());
   6399   __ Bind(slow_path->GetExitLabel());
   6400 }
   6401 
   6402 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
   6403   LocationSummary* locations =
   6404       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   6405   locations->SetInAt(0, Location::RequiresRegister());
   6406   locations->SetOut(Location::RequiresRegister());
   6407 }
   6408 
   6409 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
   6410   LocationSummary* locations = instruction->GetLocations();
   6411   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
   6412     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   6413         instruction->GetIndex(), kArm64PointerSize).SizeValue();
   6414     __ Ldr(XRegisterFrom(locations->Out()),
   6415            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
   6416   } else {
   6417     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   6418         instruction->GetIndex(), kArm64PointerSize));
   6419     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
   6420         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
   6421     __ Ldr(XRegisterFrom(locations->Out()),
   6422            MemOperand(XRegisterFrom(locations->Out()), method_offset));
   6423   }
   6424 }
   6425 
   6426 static void PatchJitRootUse(uint8_t* code,
   6427                             const uint8_t* roots_data,
   6428                             vixl::aarch64::Literal<uint32_t>* literal,
   6429                             uint64_t index_in_table) {
   6430   uint32_t literal_offset = literal->GetOffset();
   6431   uintptr_t address =
   6432       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
   6433   uint8_t* data = code + literal_offset;
   6434   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
   6435 }
   6436 
   6437 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   6438   for (const auto& entry : jit_string_patches_) {
   6439     const auto& it = jit_string_roots_.find(entry.first);
   6440     DCHECK(it != jit_string_roots_.end());
   6441     PatchJitRootUse(code, roots_data, entry.second, it->second);
   6442   }
   6443   for (const auto& entry : jit_class_patches_) {
   6444     const auto& it = jit_class_roots_.find(entry.first);
   6445     DCHECK(it != jit_class_roots_.end());
   6446     PatchJitRootUse(code, roots_data, entry.second, it->second);
   6447   }
   6448 }
   6449 
   6450 #undef __
   6451 #undef QUICK_ENTRY_POINT
   6452 
   6453 }  // namespace arm64
   6454 }  // namespace art
   6455