1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_arm64.h" 18 19 #include "arch/arm64/asm_support_arm64.h" 20 #include "arch/arm64/instruction_set_features_arm64.h" 21 #include "art_method.h" 22 #include "base/bit_utils.h" 23 #include "base/bit_utils_iterator.h" 24 #include "code_generator_utils.h" 25 #include "compiled_method.h" 26 #include "entrypoints/quick/quick_entrypoints.h" 27 #include "entrypoints/quick/quick_entrypoints_enum.h" 28 #include "gc/accounting/card_table.h" 29 #include "intrinsics.h" 30 #include "intrinsics_arm64.h" 31 #include "linker/arm64/relative_patcher_arm64.h" 32 #include "mirror/array-inl.h" 33 #include "mirror/class-inl.h" 34 #include "lock_word.h" 35 #include "offsets.h" 36 #include "thread.h" 37 #include "utils/arm64/assembler_arm64.h" 38 #include "utils/assembler.h" 39 #include "utils/stack_checks.h" 40 41 using namespace vixl::aarch64; // NOLINT(build/namespaces) 42 using vixl::ExactAssemblyScope; 43 using vixl::CodeBufferCheckScope; 44 using vixl::EmissionCheckScope; 45 46 #ifdef __ 47 #error "ARM64 Codegen VIXL macro-assembler macro already defined." 48 #endif 49 50 namespace art { 51 52 template<class MirrorType> 53 class GcRoot; 54 55 namespace arm64 { 56 57 using helpers::ARM64EncodableConstantOrRegister; 58 using helpers::ArtVixlRegCodeCoherentForRegSet; 59 using helpers::CPURegisterFrom; 60 using helpers::DRegisterFrom; 61 using helpers::FPRegisterFrom; 62 using helpers::HeapOperand; 63 using helpers::HeapOperandFrom; 64 using helpers::InputCPURegisterAt; 65 using helpers::InputCPURegisterOrZeroRegAt; 66 using helpers::InputFPRegisterAt; 67 using helpers::InputOperandAt; 68 using helpers::InputRegisterAt; 69 using helpers::Int64ConstantFrom; 70 using helpers::IsConstantZeroBitPattern; 71 using helpers::LocationFrom; 72 using helpers::OperandFromMemOperand; 73 using helpers::OutputCPURegister; 74 using helpers::OutputFPRegister; 75 using helpers::OutputRegister; 76 using helpers::QRegisterFrom; 77 using helpers::RegisterFrom; 78 using helpers::StackOperandFrom; 79 using helpers::VIXLRegCodeFromART; 80 using helpers::WRegisterFrom; 81 using helpers::XRegisterFrom; 82 83 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump 84 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 85 // generates less code/data with a small num_entries. 86 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; 87 88 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle 89 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. 90 // For the Baker read barrier implementation using link-generated thunks we need to split 91 // the offset explicitly. 92 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; 93 94 // Flags controlling the use of link-time generated thunks for Baker read barriers. 95 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; 96 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; 97 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; 98 99 // Some instructions have special requirements for a temporary, for example 100 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require 101 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field 102 // loads with large offsets need a fixed register to limit the number of link-time 103 // thunks we generate. For these and similar cases, we want to reserve a specific 104 // register that's neither callee-save nor an argument register. We choose x15. 105 inline Location FixedTempLocation() { 106 return Location::RegisterLocation(x15.GetCode()); 107 } 108 109 inline Condition ARM64Condition(IfCondition cond) { 110 switch (cond) { 111 case kCondEQ: return eq; 112 case kCondNE: return ne; 113 case kCondLT: return lt; 114 case kCondLE: return le; 115 case kCondGT: return gt; 116 case kCondGE: return ge; 117 case kCondB: return lo; 118 case kCondBE: return ls; 119 case kCondA: return hi; 120 case kCondAE: return hs; 121 } 122 LOG(FATAL) << "Unreachable"; 123 UNREACHABLE(); 124 } 125 126 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { 127 // The ARM64 condition codes can express all the necessary branches, see the 128 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. 129 // There is no dex instruction or HIR that would need the missing conditions 130 // "equal or unordered" or "not equal". 131 switch (cond) { 132 case kCondEQ: return eq; 133 case kCondNE: return ne /* unordered */; 134 case kCondLT: return gt_bias ? cc : lt /* unordered */; 135 case kCondLE: return gt_bias ? ls : le /* unordered */; 136 case kCondGT: return gt_bias ? hi /* unordered */ : gt; 137 case kCondGE: return gt_bias ? cs /* unordered */ : ge; 138 default: 139 LOG(FATAL) << "UNREACHABLE"; 140 UNREACHABLE(); 141 } 142 } 143 144 Location ARM64ReturnLocation(Primitive::Type return_type) { 145 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the 146 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, 147 // but we use the exact registers for clarity. 148 if (return_type == Primitive::kPrimFloat) { 149 return LocationFrom(s0); 150 } else if (return_type == Primitive::kPrimDouble) { 151 return LocationFrom(d0); 152 } else if (return_type == Primitive::kPrimLong) { 153 return LocationFrom(x0); 154 } else if (return_type == Primitive::kPrimVoid) { 155 return Location::NoLocation(); 156 } else { 157 return LocationFrom(w0); 158 } 159 } 160 161 Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) { 162 return ARM64ReturnLocation(return_type); 163 } 164 165 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 166 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT 167 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() 168 169 // Calculate memory accessing operand for save/restore live registers. 170 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, 171 LocationSummary* locations, 172 int64_t spill_offset, 173 bool is_save) { 174 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); 175 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); 176 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, 177 codegen->GetNumberOfCoreRegisters(), 178 fp_spills, 179 codegen->GetNumberOfFloatingPointRegisters())); 180 181 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills); 182 unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize; 183 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills); 184 185 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler(); 186 UseScratchRegisterScope temps(masm); 187 188 Register base = masm->StackPointer(); 189 int64_t core_spill_size = core_list.GetTotalSizeInBytes(); 190 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes(); 191 int64_t reg_size = kXRegSizeInBytes; 192 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size; 193 uint32_t ls_access_size = WhichPowerOf2(reg_size); 194 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) && 195 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) { 196 // If the offset does not fit in the instruction's immediate field, use an alternate register 197 // to compute the base address(float point registers spill base address). 198 Register new_base = temps.AcquireSameSizeAs(base); 199 __ Add(new_base, base, Operand(spill_offset + core_spill_size)); 200 base = new_base; 201 spill_offset = -core_spill_size; 202 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size; 203 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size)); 204 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size)); 205 } 206 207 if (is_save) { 208 __ StoreCPURegList(core_list, MemOperand(base, spill_offset)); 209 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 210 } else { 211 __ LoadCPURegList(core_list, MemOperand(base, spill_offset)); 212 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 213 } 214 } 215 216 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 217 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); 218 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); 219 for (uint32_t i : LowToHighBits(core_spills)) { 220 // If the register holds an object, update the stack mask. 221 if (locations->RegisterContainsObject(i)) { 222 locations->SetStackBit(stack_offset / kVRegSize); 223 } 224 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 225 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 226 saved_core_stack_offsets_[i] = stack_offset; 227 stack_offset += kXRegSizeInBytes; 228 } 229 230 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); 231 for (uint32_t i : LowToHighBits(fp_spills)) { 232 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 233 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 234 saved_fpu_stack_offsets_[i] = stack_offset; 235 stack_offset += kDRegSizeInBytes; 236 } 237 238 SaveRestoreLiveRegistersHelper(codegen, 239 locations, 240 codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */); 241 } 242 243 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 244 SaveRestoreLiveRegistersHelper(codegen, 245 locations, 246 codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */); 247 } 248 249 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { 250 public: 251 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} 252 253 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 254 LocationSummary* locations = instruction_->GetLocations(); 255 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 256 257 __ Bind(GetEntryLabel()); 258 if (instruction_->CanThrowIntoCatchBlock()) { 259 // Live registers will be restored in the catch block if caught. 260 SaveLiveRegisters(codegen, instruction_->GetLocations()); 261 } 262 // We're moving two locations to locations that could overlap, so we need a parallel 263 // move resolver. 264 InvokeRuntimeCallingConvention calling_convention; 265 codegen->EmitParallelMoves( 266 locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, 267 locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); 268 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 269 ? kQuickThrowStringBounds 270 : kQuickThrowArrayBounds; 271 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 272 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 273 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 274 } 275 276 bool IsFatal() const OVERRIDE { return true; } 277 278 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; } 279 280 private: 281 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); 282 }; 283 284 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { 285 public: 286 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} 287 288 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 289 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 290 __ Bind(GetEntryLabel()); 291 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 292 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 293 } 294 295 bool IsFatal() const OVERRIDE { return true; } 296 297 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; } 298 299 private: 300 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); 301 }; 302 303 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { 304 public: 305 LoadClassSlowPathARM64(HLoadClass* cls, 306 HInstruction* at, 307 uint32_t dex_pc, 308 bool do_clinit, 309 vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(), 310 vixl::aarch64::Label* bss_entry_adrp_label = nullptr) 311 : SlowPathCodeARM64(at), 312 cls_(cls), 313 dex_pc_(dex_pc), 314 do_clinit_(do_clinit), 315 bss_entry_temp_(bss_entry_temp), 316 bss_entry_adrp_label_(bss_entry_adrp_label) { 317 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 318 } 319 320 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 321 LocationSummary* locations = instruction_->GetLocations(); 322 Location out = locations->Out(); 323 constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier); 324 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 325 326 InvokeRuntimeCallingConvention calling_convention; 327 // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp 328 // register, make sure it's not clobbered by the call or by saving/restoring registers. 329 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); 330 bool is_load_class_bss_entry = 331 (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); 332 if (is_load_class_bss_entry) { 333 DCHECK(bss_entry_temp_.IsValid()); 334 DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0))); 335 DCHECK( 336 !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_)); 337 } 338 339 __ Bind(GetEntryLabel()); 340 SaveLiveRegisters(codegen, locations); 341 342 dex::TypeIndex type_index = cls_->GetTypeIndex(); 343 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); 344 QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage 345 : kQuickInitializeType; 346 arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); 347 if (do_clinit_) { 348 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); 349 } else { 350 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); 351 } 352 353 // Move the class to the desired location. 354 if (out.IsValid()) { 355 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 356 Primitive::Type type = instruction_->GetType(); 357 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); 358 } 359 RestoreLiveRegisters(codegen, locations); 360 // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. 361 if (is_load_class_bss_entry) { 362 DCHECK(out.IsValid()); 363 const DexFile& dex_file = cls_->GetDexFile(); 364 if (call_saves_everything_except_r0_ip0) { 365 // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything. 366 } else { 367 // For non-Baker read barrier, we need to re-calculate the address of the class entry page. 368 bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index); 369 arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_); 370 } 371 vixl::aarch64::Label* strp_label = 372 arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_); 373 { 374 SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); 375 __ Bind(strp_label); 376 __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), 377 MemOperand(bss_entry_temp_, /* offset placeholder */ 0)); 378 } 379 } 380 __ B(GetExitLabel()); 381 } 382 383 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; } 384 385 private: 386 // The class this slow path will load. 387 HLoadClass* const cls_; 388 389 // The dex PC of `at_`. 390 const uint32_t dex_pc_; 391 392 // Whether to initialize the class. 393 const bool do_clinit_; 394 395 // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded. 396 vixl::aarch64::Register bss_entry_temp_; 397 vixl::aarch64::Label* bss_entry_adrp_label_; 398 399 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); 400 }; 401 402 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { 403 public: 404 LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label) 405 : SlowPathCodeARM64(instruction), 406 temp_(temp), 407 adrp_label_(adrp_label) {} 408 409 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 410 LocationSummary* locations = instruction_->GetLocations(); 411 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 412 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 413 414 InvokeRuntimeCallingConvention calling_convention; 415 // Make sure `temp_` is not clobbered by the call or by saving/restoring registers. 416 DCHECK(temp_.IsValid()); 417 DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0))); 418 DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_)); 419 420 __ Bind(GetEntryLabel()); 421 SaveLiveRegisters(codegen, locations); 422 423 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 424 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_); 425 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); 426 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 427 Primitive::Type type = instruction_->GetType(); 428 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); 429 430 RestoreLiveRegisters(codegen, locations); 431 432 // Store the resolved String to the BSS entry. 433 const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile(); 434 if (!kUseReadBarrier || kUseBakerReadBarrier) { 435 // The string entry page address was preserved in temp_ thanks to kSaveEverything. 436 } else { 437 // For non-Baker read barrier, we need to re-calculate the address of the string entry page. 438 adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); 439 arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_); 440 } 441 vixl::aarch64::Label* strp_label = 442 arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_); 443 { 444 SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); 445 __ Bind(strp_label); 446 __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), 447 MemOperand(temp_, /* offset placeholder */ 0)); 448 } 449 450 __ B(GetExitLabel()); 451 } 452 453 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } 454 455 private: 456 const Register temp_; 457 vixl::aarch64::Label* adrp_label_; 458 459 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); 460 }; 461 462 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { 463 public: 464 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} 465 466 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 467 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 468 __ Bind(GetEntryLabel()); 469 if (instruction_->CanThrowIntoCatchBlock()) { 470 // Live registers will be restored in the catch block if caught. 471 SaveLiveRegisters(codegen, instruction_->GetLocations()); 472 } 473 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer, 474 instruction_, 475 instruction_->GetDexPc(), 476 this); 477 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 478 } 479 480 bool IsFatal() const OVERRIDE { return true; } 481 482 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; } 483 484 private: 485 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); 486 }; 487 488 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { 489 public: 490 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) 491 : SlowPathCodeARM64(instruction), successor_(successor) {} 492 493 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 494 LocationSummary* locations = instruction_->GetLocations(); 495 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 496 __ Bind(GetEntryLabel()); 497 SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD. 498 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 499 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 500 RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD. 501 if (successor_ == nullptr) { 502 __ B(GetReturnLabel()); 503 } else { 504 __ B(arm64_codegen->GetLabelOf(successor_)); 505 } 506 } 507 508 vixl::aarch64::Label* GetReturnLabel() { 509 DCHECK(successor_ == nullptr); 510 return &return_label_; 511 } 512 513 HBasicBlock* GetSuccessor() const { 514 return successor_; 515 } 516 517 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; } 518 519 private: 520 // If not null, the block to branch to after the suspend check. 521 HBasicBlock* const successor_; 522 523 // If `successor_` is null, the label to branch to after the suspend check. 524 vixl::aarch64::Label return_label_; 525 526 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64); 527 }; 528 529 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { 530 public: 531 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) 532 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} 533 534 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 535 LocationSummary* locations = instruction_->GetLocations(); 536 537 DCHECK(instruction_->IsCheckCast() 538 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 539 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 540 uint32_t dex_pc = instruction_->GetDexPc(); 541 542 __ Bind(GetEntryLabel()); 543 544 if (!is_fatal_) { 545 SaveLiveRegisters(codegen, locations); 546 } 547 548 // We're moving two locations to locations that could overlap, so we need a parallel 549 // move resolver. 550 InvokeRuntimeCallingConvention calling_convention; 551 codegen->EmitParallelMoves(locations->InAt(0), 552 LocationFrom(calling_convention.GetRegisterAt(0)), 553 Primitive::kPrimNot, 554 locations->InAt(1), 555 LocationFrom(calling_convention.GetRegisterAt(1)), 556 Primitive::kPrimNot); 557 if (instruction_->IsInstanceOf()) { 558 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 559 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 560 Primitive::Type ret_type = instruction_->GetType(); 561 Location ret_loc = calling_convention.GetReturnLocation(ret_type); 562 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); 563 } else { 564 DCHECK(instruction_->IsCheckCast()); 565 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 566 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 567 } 568 569 if (!is_fatal_) { 570 RestoreLiveRegisters(codegen, locations); 571 __ B(GetExitLabel()); 572 } 573 } 574 575 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; } 576 bool IsFatal() const OVERRIDE { return is_fatal_; } 577 578 private: 579 const bool is_fatal_; 580 581 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); 582 }; 583 584 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { 585 public: 586 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) 587 : SlowPathCodeARM64(instruction) {} 588 589 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 590 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 591 __ Bind(GetEntryLabel()); 592 LocationSummary* locations = instruction_->GetLocations(); 593 SaveLiveRegisters(codegen, locations); 594 InvokeRuntimeCallingConvention calling_convention; 595 __ Mov(calling_convention.GetRegisterAt(0), 596 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 597 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 598 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 599 } 600 601 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } 602 603 private: 604 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); 605 }; 606 607 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { 608 public: 609 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} 610 611 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 612 LocationSummary* locations = instruction_->GetLocations(); 613 __ Bind(GetEntryLabel()); 614 SaveLiveRegisters(codegen, locations); 615 616 InvokeRuntimeCallingConvention calling_convention; 617 HParallelMove parallel_move(codegen->GetGraph()->GetArena()); 618 parallel_move.AddMove( 619 locations->InAt(0), 620 LocationFrom(calling_convention.GetRegisterAt(0)), 621 Primitive::kPrimNot, 622 nullptr); 623 parallel_move.AddMove( 624 locations->InAt(1), 625 LocationFrom(calling_convention.GetRegisterAt(1)), 626 Primitive::kPrimInt, 627 nullptr); 628 parallel_move.AddMove( 629 locations->InAt(2), 630 LocationFrom(calling_convention.GetRegisterAt(2)), 631 Primitive::kPrimNot, 632 nullptr); 633 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 634 635 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 636 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 637 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 638 RestoreLiveRegisters(codegen, locations); 639 __ B(GetExitLabel()); 640 } 641 642 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; } 643 644 private: 645 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); 646 }; 647 648 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { 649 uint32_t num_entries = switch_instr_->GetNumEntries(); 650 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); 651 652 // We are about to use the assembler to place literals directly. Make sure we have enough 653 // underlying code buffer and we have generated the jump table with right size. 654 EmissionCheckScope scope(codegen->GetVIXLAssembler(), 655 num_entries * sizeof(int32_t), 656 CodeBufferCheckScope::kExactSize); 657 658 __ Bind(&table_start_); 659 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); 660 for (uint32_t i = 0; i < num_entries; i++) { 661 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]); 662 DCHECK(target_label->IsBound()); 663 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation(); 664 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); 665 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); 666 Literal<int32_t> literal(jump_offset); 667 __ place(&literal); 668 } 669 } 670 671 // Abstract base class for read barrier slow paths marking a reference 672 // `ref`. 673 // 674 // Argument `entrypoint` must be a register location holding the read 675 // barrier marking runtime entry point to be invoked or an empty 676 // location; in the latter case, the read barrier marking runtime 677 // entry point will be loaded by the slow path code itself. 678 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { 679 protected: 680 ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint) 681 : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) { 682 DCHECK(kEmitCompilerReadBarrier); 683 } 684 685 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; } 686 687 // Generate assembly code calling the read barrier marking runtime 688 // entry point (ReadBarrierMarkRegX). 689 void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { 690 // No need to save live registers; it's taken care of by the 691 // entrypoint. Also, there is no need to update the stack mask, 692 // as this runtime call will not trigger a garbage collection. 693 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 694 DCHECK_NE(ref_.reg(), LR); 695 DCHECK_NE(ref_.reg(), WSP); 696 DCHECK_NE(ref_.reg(), WZR); 697 // IP0 is used internally by the ReadBarrierMarkRegX entry point 698 // as a temporary, it cannot be the entry point's input/output. 699 DCHECK_NE(ref_.reg(), IP0); 700 DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); 701 // "Compact" slow path, saving two moves. 702 // 703 // Instead of using the standard runtime calling convention (input 704 // and output in W0): 705 // 706 // W0 <- ref 707 // W0 <- ReadBarrierMark(W0) 708 // ref <- W0 709 // 710 // we just use rX (the register containing `ref`) as input and output 711 // of a dedicated entrypoint: 712 // 713 // rX <- ReadBarrierMarkRegX(rX) 714 // 715 if (entrypoint_.IsValid()) { 716 arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); 717 __ Blr(XRegisterFrom(entrypoint_)); 718 } else { 719 // Entrypoint is not already loaded, load from the thread. 720 int32_t entry_point_offset = 721 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); 722 // This runtime call does not require a stack map. 723 arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 724 } 725 } 726 727 // The location (register) of the marked object reference. 728 const Location ref_; 729 730 // The location of the entrypoint if it is already loaded. 731 const Location entrypoint_; 732 733 private: 734 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64); 735 }; 736 737 // Slow path marking an object reference `ref` during a read 738 // barrier. The field `obj.field` in the object `obj` holding this 739 // reference does not get updated by this slow path after marking. 740 // 741 // This means that after the execution of this slow path, `ref` will 742 // always be up-to-date, but `obj.field` may not; i.e., after the 743 // flip, `ref` will be a to-space reference, but `obj.field` will 744 // probably still be a from-space reference (unless it gets updated by 745 // another thread, or if another thread installed another object 746 // reference (different from `ref`) in `obj.field`). 747 // 748 // Argument `entrypoint` must be a register location holding the read 749 // barrier marking runtime entry point to be invoked or an empty 750 // location; in the latter case, the read barrier marking runtime 751 // entry point will be loaded by the slow path code itself. 752 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { 753 public: 754 ReadBarrierMarkSlowPathARM64(HInstruction* instruction, 755 Location ref, 756 Location entrypoint = Location::NoLocation()) 757 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) { 758 DCHECK(kEmitCompilerReadBarrier); 759 } 760 761 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } 762 763 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 764 LocationSummary* locations = instruction_->GetLocations(); 765 DCHECK(locations->CanCall()); 766 DCHECK(ref_.IsRegister()) << ref_; 767 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); 768 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 769 << "Unexpected instruction in read barrier marking slow path: " 770 << instruction_->DebugName(); 771 772 __ Bind(GetEntryLabel()); 773 GenerateReadBarrierMarkRuntimeCall(codegen); 774 __ B(GetExitLabel()); 775 } 776 777 private: 778 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); 779 }; 780 781 // Slow path loading `obj`'s lock word, loading a reference from 782 // object `*(obj + offset + (index << scale_factor))` into `ref`, and 783 // marking `ref` if `obj` is gray according to the lock word (Baker 784 // read barrier). The field `obj.field` in the object `obj` holding 785 // this reference does not get updated by this slow path after marking 786 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 787 // below for that). 788 // 789 // This means that after the execution of this slow path, `ref` will 790 // always be up-to-date, but `obj.field` may not; i.e., after the 791 // flip, `ref` will be a to-space reference, but `obj.field` will 792 // probably still be a from-space reference (unless it gets updated by 793 // another thread, or if another thread installed another object 794 // reference (different from `ref`) in `obj.field`). 795 // 796 // Argument `entrypoint` must be a register location holding the read 797 // barrier marking runtime entry point to be invoked or an empty 798 // location; in the latter case, the read barrier marking runtime 799 // entry point will be loaded by the slow path code itself. 800 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { 801 public: 802 LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction, 803 Location ref, 804 Register obj, 805 uint32_t offset, 806 Location index, 807 size_t scale_factor, 808 bool needs_null_check, 809 bool use_load_acquire, 810 Register temp, 811 Location entrypoint = Location::NoLocation()) 812 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), 813 obj_(obj), 814 offset_(offset), 815 index_(index), 816 scale_factor_(scale_factor), 817 needs_null_check_(needs_null_check), 818 use_load_acquire_(use_load_acquire), 819 temp_(temp) { 820 DCHECK(kEmitCompilerReadBarrier); 821 DCHECK(kUseBakerReadBarrier); 822 } 823 824 const char* GetDescription() const OVERRIDE { 825 return "LoadReferenceWithBakerReadBarrierSlowPathARM64"; 826 } 827 828 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 829 LocationSummary* locations = instruction_->GetLocations(); 830 DCHECK(locations->CanCall()); 831 DCHECK(ref_.IsRegister()) << ref_; 832 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); 833 DCHECK(obj_.IsW()); 834 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); 835 DCHECK(instruction_->IsInstanceFieldGet() || 836 instruction_->IsStaticFieldGet() || 837 instruction_->IsArrayGet() || 838 instruction_->IsArraySet() || 839 instruction_->IsInstanceOf() || 840 instruction_->IsCheckCast() || 841 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 842 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 843 << "Unexpected instruction in read barrier marking slow path: " 844 << instruction_->DebugName(); 845 // The read barrier instrumentation of object ArrayGet 846 // instructions does not support the HIntermediateAddress 847 // instruction. 848 DCHECK(!(instruction_->IsArrayGet() && 849 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 850 851 // Temporary register `temp_`, used to store the lock word, must 852 // not be IP0 nor IP1, as we may use them to emit the reference 853 // load (in the call to GenerateRawReferenceLoad below), and we 854 // need the lock word to still be in `temp_` after the reference 855 // load. 856 DCHECK_NE(LocationFrom(temp_).reg(), IP0); 857 DCHECK_NE(LocationFrom(temp_).reg(), IP1); 858 859 __ Bind(GetEntryLabel()); 860 861 // When using MaybeGenerateReadBarrierSlow, the read barrier call is 862 // inserted after the original load. However, in fast path based 863 // Baker's read barriers, we need to perform the load of 864 // mirror::Object::monitor_ *before* the original reference load. 865 // This load-load ordering is required by the read barrier. 866 // The slow path (for Baker's algorithm) should look like: 867 // 868 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 869 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 870 // HeapReference<mirror::Object> ref = *src; // Original reference load. 871 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 872 // if (is_gray) { 873 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 874 // } 875 // 876 // Note: the original implementation in ReadBarrier::Barrier is 877 // slightly more complex as it performs additional checks that we do 878 // not do here for performance reasons. 879 880 // /* int32_t */ monitor = obj->monitor_ 881 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 882 __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); 883 if (needs_null_check_) { 884 codegen->MaybeRecordImplicitNullCheck(instruction_); 885 } 886 // /* LockWord */ lock_word = LockWord(monitor) 887 static_assert(sizeof(LockWord) == sizeof(int32_t), 888 "art::LockWord and int32_t have different sizes."); 889 890 // Introduce a dependency on the lock_word including rb_state, 891 // to prevent load-load reordering, and without using 892 // a memory barrier (which would be more expensive). 893 // `obj` is unchanged by this operation, but its value now depends 894 // on `temp`. 895 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); 896 897 // The actual reference load. 898 // A possible implicit null check has already been handled above. 899 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 900 arm64_codegen->GenerateRawReferenceLoad(instruction_, 901 ref_, 902 obj_, 903 offset_, 904 index_, 905 scale_factor_, 906 /* needs_null_check */ false, 907 use_load_acquire_); 908 909 // Mark the object `ref` when `obj` is gray. 910 // 911 // if (rb_state == ReadBarrier::GrayState()) 912 // ref = ReadBarrier::Mark(ref); 913 // 914 // Given the numeric representation, it's enough to check the low bit of the rb_state. 915 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 916 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 917 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); 918 GenerateReadBarrierMarkRuntimeCall(codegen); 919 920 __ B(GetExitLabel()); 921 } 922 923 private: 924 // The register containing the object holding the marked object reference field. 925 Register obj_; 926 // The offset, index and scale factor to access the reference in `obj_`. 927 uint32_t offset_; 928 Location index_; 929 size_t scale_factor_; 930 // Is a null check required? 931 bool needs_null_check_; 932 // Should this reference load use Load-Acquire semantics? 933 bool use_load_acquire_; 934 // A temporary register used to hold the lock word of `obj_`. 935 Register temp_; 936 937 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64); 938 }; 939 940 // Slow path loading `obj`'s lock word, loading a reference from 941 // object `*(obj + offset + (index << scale_factor))` into `ref`, and 942 // marking `ref` if `obj` is gray according to the lock word (Baker 943 // read barrier). If needed, this slow path also atomically updates 944 // the field `obj.field` in the object `obj` holding this reference 945 // after marking (contrary to 946 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never 947 // tries to update `obj.field`). 948 // 949 // This means that after the execution of this slow path, both `ref` 950 // and `obj.field` will be up-to-date; i.e., after the flip, both will 951 // hold the same to-space reference (unless another thread installed 952 // another object reference (different from `ref`) in `obj.field`). 953 // 954 // Argument `entrypoint` must be a register location holding the read 955 // barrier marking runtime entry point to be invoked or an empty 956 // location; in the latter case, the read barrier marking runtime 957 // entry point will be loaded by the slow path code itself. 958 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 959 : public ReadBarrierMarkSlowPathBaseARM64 { 960 public: 961 LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( 962 HInstruction* instruction, 963 Location ref, 964 Register obj, 965 uint32_t offset, 966 Location index, 967 size_t scale_factor, 968 bool needs_null_check, 969 bool use_load_acquire, 970 Register temp, 971 Location entrypoint = Location::NoLocation()) 972 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), 973 obj_(obj), 974 offset_(offset), 975 index_(index), 976 scale_factor_(scale_factor), 977 needs_null_check_(needs_null_check), 978 use_load_acquire_(use_load_acquire), 979 temp_(temp) { 980 DCHECK(kEmitCompilerReadBarrier); 981 DCHECK(kUseBakerReadBarrier); 982 } 983 984 const char* GetDescription() const OVERRIDE { 985 return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64"; 986 } 987 988 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 989 LocationSummary* locations = instruction_->GetLocations(); 990 Register ref_reg = WRegisterFrom(ref_); 991 DCHECK(locations->CanCall()); 992 DCHECK(ref_.IsRegister()) << ref_; 993 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); 994 DCHECK(obj_.IsW()); 995 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); 996 997 // This slow path is only used by the UnsafeCASObject intrinsic at the moment. 998 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 999 << "Unexpected instruction in read barrier marking and field updating slow path: " 1000 << instruction_->DebugName(); 1001 DCHECK(instruction_->GetLocations()->Intrinsified()); 1002 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 1003 DCHECK_EQ(offset_, 0u); 1004 DCHECK_EQ(scale_factor_, 0u); 1005 DCHECK_EQ(use_load_acquire_, false); 1006 // The location of the offset of the marked reference field within `obj_`. 1007 Location field_offset = index_; 1008 DCHECK(field_offset.IsRegister()) << field_offset; 1009 1010 // Temporary register `temp_`, used to store the lock word, must 1011 // not be IP0 nor IP1, as we may use them to emit the reference 1012 // load (in the call to GenerateRawReferenceLoad below), and we 1013 // need the lock word to still be in `temp_` after the reference 1014 // load. 1015 DCHECK_NE(LocationFrom(temp_).reg(), IP0); 1016 DCHECK_NE(LocationFrom(temp_).reg(), IP1); 1017 1018 __ Bind(GetEntryLabel()); 1019 1020 // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: 1021 // 1022 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 1023 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 1024 // HeapReference<mirror::Object> ref = *src; // Original reference load. 1025 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 1026 // if (is_gray) { 1027 // old_ref = ref; 1028 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 1029 // compareAndSwapObject(obj, field_offset, old_ref, ref); 1030 // } 1031 1032 // /* int32_t */ monitor = obj->monitor_ 1033 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 1034 __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); 1035 if (needs_null_check_) { 1036 codegen->MaybeRecordImplicitNullCheck(instruction_); 1037 } 1038 // /* LockWord */ lock_word = LockWord(monitor) 1039 static_assert(sizeof(LockWord) == sizeof(int32_t), 1040 "art::LockWord and int32_t have different sizes."); 1041 1042 // Introduce a dependency on the lock_word including rb_state, 1043 // to prevent load-load reordering, and without using 1044 // a memory barrier (which would be more expensive). 1045 // `obj` is unchanged by this operation, but its value now depends 1046 // on `temp`. 1047 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); 1048 1049 // The actual reference load. 1050 // A possible implicit null check has already been handled above. 1051 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 1052 arm64_codegen->GenerateRawReferenceLoad(instruction_, 1053 ref_, 1054 obj_, 1055 offset_, 1056 index_, 1057 scale_factor_, 1058 /* needs_null_check */ false, 1059 use_load_acquire_); 1060 1061 // Mark the object `ref` when `obj` is gray. 1062 // 1063 // if (rb_state == ReadBarrier::GrayState()) 1064 // ref = ReadBarrier::Mark(ref); 1065 // 1066 // Given the numeric representation, it's enough to check the low bit of the rb_state. 1067 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 1068 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 1069 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); 1070 1071 // Save the old value of the reference before marking it. 1072 // Note that we cannot use IP to save the old reference, as IP is 1073 // used internally by the ReadBarrierMarkRegX entry point, and we 1074 // need the old reference after the call to that entry point. 1075 DCHECK_NE(LocationFrom(temp_).reg(), IP0); 1076 __ Mov(temp_.W(), ref_reg); 1077 1078 GenerateReadBarrierMarkRuntimeCall(codegen); 1079 1080 // If the new reference is different from the old reference, 1081 // update the field in the holder (`*(obj_ + field_offset)`). 1082 // 1083 // Note that this field could also hold a different object, if 1084 // another thread had concurrently changed it. In that case, the 1085 // LDXR/CMP/BNE sequence of instructions in the compare-and-set 1086 // (CAS) operation below would abort the CAS, leaving the field 1087 // as-is. 1088 __ Cmp(temp_.W(), ref_reg); 1089 __ B(eq, GetExitLabel()); 1090 1091 // Update the the holder's field atomically. This may fail if 1092 // mutator updates before us, but it's OK. This is achieved 1093 // using a strong compare-and-set (CAS) operation with relaxed 1094 // memory synchronization ordering, where the expected value is 1095 // the old reference and the desired value is the new reference. 1096 1097 MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); 1098 UseScratchRegisterScope temps(masm); 1099 1100 // Convenience aliases. 1101 Register base = obj_.W(); 1102 Register offset = XRegisterFrom(field_offset); 1103 Register expected = temp_.W(); 1104 Register value = ref_reg; 1105 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. 1106 Register tmp_value = temps.AcquireW(); // Value in memory. 1107 1108 __ Add(tmp_ptr, base.X(), Operand(offset)); 1109 1110 if (kPoisonHeapReferences) { 1111 arm64_codegen->GetAssembler()->PoisonHeapReference(expected); 1112 if (value.Is(expected)) { 1113 // Do not poison `value`, as it is the same register as 1114 // `expected`, which has just been poisoned. 1115 } else { 1116 arm64_codegen->GetAssembler()->PoisonHeapReference(value); 1117 } 1118 } 1119 1120 // do { 1121 // tmp_value = [tmp_ptr] - expected; 1122 // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); 1123 1124 vixl::aarch64::Label loop_head, comparison_failed, exit_loop; 1125 __ Bind(&loop_head); 1126 __ Ldxr(tmp_value, MemOperand(tmp_ptr)); 1127 __ Cmp(tmp_value, expected); 1128 __ B(&comparison_failed, ne); 1129 __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); 1130 __ Cbnz(tmp_value, &loop_head); 1131 __ B(&exit_loop); 1132 __ Bind(&comparison_failed); 1133 __ Clrex(); 1134 __ Bind(&exit_loop); 1135 1136 if (kPoisonHeapReferences) { 1137 arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); 1138 if (value.Is(expected)) { 1139 // Do not unpoison `value`, as it is the same register as 1140 // `expected`, which has just been unpoisoned. 1141 } else { 1142 arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); 1143 } 1144 } 1145 1146 __ B(GetExitLabel()); 1147 } 1148 1149 private: 1150 // The register containing the object holding the marked object reference field. 1151 const Register obj_; 1152 // The offset, index and scale factor to access the reference in `obj_`. 1153 uint32_t offset_; 1154 Location index_; 1155 size_t scale_factor_; 1156 // Is a null check required? 1157 bool needs_null_check_; 1158 // Should this reference load use Load-Acquire semantics? 1159 bool use_load_acquire_; 1160 // A temporary register used to hold the lock word of `obj_`; and 1161 // also to hold the original reference value, when the reference is 1162 // marked. 1163 const Register temp_; 1164 1165 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64); 1166 }; 1167 1168 // Slow path generating a read barrier for a heap reference. 1169 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { 1170 public: 1171 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, 1172 Location out, 1173 Location ref, 1174 Location obj, 1175 uint32_t offset, 1176 Location index) 1177 : SlowPathCodeARM64(instruction), 1178 out_(out), 1179 ref_(ref), 1180 obj_(obj), 1181 offset_(offset), 1182 index_(index) { 1183 DCHECK(kEmitCompilerReadBarrier); 1184 // If `obj` is equal to `out` or `ref`, it means the initial object 1185 // has been overwritten by (or after) the heap object reference load 1186 // to be instrumented, e.g.: 1187 // 1188 // __ Ldr(out, HeapOperand(out, class_offset); 1189 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 1190 // 1191 // In that case, we have lost the information about the original 1192 // object, and the emitted read barrier cannot work properly. 1193 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 1194 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 1195 } 1196 1197 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 1198 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 1199 LocationSummary* locations = instruction_->GetLocations(); 1200 Primitive::Type type = Primitive::kPrimNot; 1201 DCHECK(locations->CanCall()); 1202 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 1203 DCHECK(instruction_->IsInstanceFieldGet() || 1204 instruction_->IsStaticFieldGet() || 1205 instruction_->IsArrayGet() || 1206 instruction_->IsInstanceOf() || 1207 instruction_->IsCheckCast() || 1208 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 1209 << "Unexpected instruction in read barrier for heap reference slow path: " 1210 << instruction_->DebugName(); 1211 // The read barrier instrumentation of object ArrayGet 1212 // instructions does not support the HIntermediateAddress 1213 // instruction. 1214 DCHECK(!(instruction_->IsArrayGet() && 1215 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 1216 1217 __ Bind(GetEntryLabel()); 1218 1219 SaveLiveRegisters(codegen, locations); 1220 1221 // We may have to change the index's value, but as `index_` is a 1222 // constant member (like other "inputs" of this slow path), 1223 // introduce a copy of it, `index`. 1224 Location index = index_; 1225 if (index_.IsValid()) { 1226 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 1227 if (instruction_->IsArrayGet()) { 1228 // Compute the actual memory offset and store it in `index`. 1229 Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); 1230 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); 1231 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { 1232 // We are about to change the value of `index_reg` (see the 1233 // calls to vixl::MacroAssembler::Lsl and 1234 // vixl::MacroAssembler::Mov below), but it has 1235 // not been saved by the previous call to 1236 // art::SlowPathCode::SaveLiveRegisters, as it is a 1237 // callee-save register -- 1238 // art::SlowPathCode::SaveLiveRegisters does not consider 1239 // callee-save registers, as it has been designed with the 1240 // assumption that callee-save registers are supposed to be 1241 // handled by the called function. So, as a callee-save 1242 // register, `index_reg` _would_ eventually be saved onto 1243 // the stack, but it would be too late: we would have 1244 // changed its value earlier. Therefore, we manually save 1245 // it here into another freely available register, 1246 // `free_reg`, chosen of course among the caller-save 1247 // registers (as a callee-save `free_reg` register would 1248 // exhibit the same problem). 1249 // 1250 // Note we could have requested a temporary register from 1251 // the register allocator instead; but we prefer not to, as 1252 // this is a slow path, and we know we can find a 1253 // caller-save register that is available. 1254 Register free_reg = FindAvailableCallerSaveRegister(codegen); 1255 __ Mov(free_reg.W(), index_reg); 1256 index_reg = free_reg; 1257 index = LocationFrom(index_reg); 1258 } else { 1259 // The initial register stored in `index_` has already been 1260 // saved in the call to art::SlowPathCode::SaveLiveRegisters 1261 // (as it is not a callee-save register), so we can freely 1262 // use it. 1263 } 1264 // Shifting the index value contained in `index_reg` by the scale 1265 // factor (2) cannot overflow in practice, as the runtime is 1266 // unable to allocate object arrays with a size larger than 1267 // 2^26 - 1 (that is, 2^28 - 4 bytes). 1268 __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); 1269 static_assert( 1270 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 1271 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 1272 __ Add(index_reg, index_reg, Operand(offset_)); 1273 } else { 1274 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 1275 // intrinsics, `index_` is not shifted by a scale factor of 2 1276 // (as in the case of ArrayGet), as it is actually an offset 1277 // to an object field within an object. 1278 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 1279 DCHECK(instruction_->GetLocations()->Intrinsified()); 1280 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 1281 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 1282 << instruction_->AsInvoke()->GetIntrinsic(); 1283 DCHECK_EQ(offset_, 0u); 1284 DCHECK(index_.IsRegister()); 1285 } 1286 } 1287 1288 // We're moving two or three locations to locations that could 1289 // overlap, so we need a parallel move resolver. 1290 InvokeRuntimeCallingConvention calling_convention; 1291 HParallelMove parallel_move(codegen->GetGraph()->GetArena()); 1292 parallel_move.AddMove(ref_, 1293 LocationFrom(calling_convention.GetRegisterAt(0)), 1294 type, 1295 nullptr); 1296 parallel_move.AddMove(obj_, 1297 LocationFrom(calling_convention.GetRegisterAt(1)), 1298 type, 1299 nullptr); 1300 if (index.IsValid()) { 1301 parallel_move.AddMove(index, 1302 LocationFrom(calling_convention.GetRegisterAt(2)), 1303 Primitive::kPrimInt, 1304 nullptr); 1305 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 1306 } else { 1307 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 1308 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); 1309 } 1310 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 1311 instruction_, 1312 instruction_->GetDexPc(), 1313 this); 1314 CheckEntrypointTypes< 1315 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 1316 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 1317 1318 RestoreLiveRegisters(codegen, locations); 1319 1320 __ B(GetExitLabel()); 1321 } 1322 1323 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } 1324 1325 private: 1326 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 1327 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode()); 1328 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode()); 1329 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 1330 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 1331 return Register(VIXLRegCodeFromART(i), kXRegSize); 1332 } 1333 } 1334 // We shall never fail to find a free caller-save register, as 1335 // there are more than two core caller-save registers on ARM64 1336 // (meaning it is possible to find one which is different from 1337 // `ref` and `obj`). 1338 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 1339 LOG(FATAL) << "Could not find a free register"; 1340 UNREACHABLE(); 1341 } 1342 1343 const Location out_; 1344 const Location ref_; 1345 const Location obj_; 1346 const uint32_t offset_; 1347 // An additional location containing an index to an array. 1348 // Only used for HArrayGet and the UnsafeGetObject & 1349 // UnsafeGetObjectVolatile intrinsics. 1350 const Location index_; 1351 1352 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); 1353 }; 1354 1355 // Slow path generating a read barrier for a GC root. 1356 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { 1357 public: 1358 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) 1359 : SlowPathCodeARM64(instruction), out_(out), root_(root) { 1360 DCHECK(kEmitCompilerReadBarrier); 1361 } 1362 1363 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 1364 LocationSummary* locations = instruction_->GetLocations(); 1365 Primitive::Type type = Primitive::kPrimNot; 1366 DCHECK(locations->CanCall()); 1367 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 1368 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 1369 << "Unexpected instruction in read barrier for GC root slow path: " 1370 << instruction_->DebugName(); 1371 1372 __ Bind(GetEntryLabel()); 1373 SaveLiveRegisters(codegen, locations); 1374 1375 InvokeRuntimeCallingConvention calling_convention; 1376 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 1377 // The argument of the ReadBarrierForRootSlow is not a managed 1378 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; 1379 // thus we need a 64-bit move here, and we cannot use 1380 // 1381 // arm64_codegen->MoveLocation( 1382 // LocationFrom(calling_convention.GetRegisterAt(0)), 1383 // root_, 1384 // type); 1385 // 1386 // which would emit a 32-bit move, as `type` is a (32-bit wide) 1387 // reference type (`Primitive::kPrimNot`). 1388 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); 1389 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 1390 instruction_, 1391 instruction_->GetDexPc(), 1392 this); 1393 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 1394 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 1395 1396 RestoreLiveRegisters(codegen, locations); 1397 __ B(GetExitLabel()); 1398 } 1399 1400 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } 1401 1402 private: 1403 const Location out_; 1404 const Location root_; 1405 1406 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); 1407 }; 1408 1409 #undef __ 1410 1411 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { 1412 Location next_location; 1413 if (type == Primitive::kPrimVoid) { 1414 LOG(FATAL) << "Unreachable type " << type; 1415 } 1416 1417 if (Primitive::IsFloatingPointType(type) && 1418 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { 1419 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); 1420 } else if (!Primitive::IsFloatingPointType(type) && 1421 (gp_index_ < calling_convention.GetNumberOfRegisters())) { 1422 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); 1423 } else { 1424 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); 1425 next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) 1426 : Location::StackSlot(stack_offset); 1427 } 1428 1429 // Space on the stack is reserved for all arguments. 1430 stack_index_ += Primitive::Is64BitType(type) ? 2 : 1; 1431 return next_location; 1432 } 1433 1434 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { 1435 return LocationFrom(kArtMethodRegister); 1436 } 1437 1438 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, 1439 const Arm64InstructionSetFeatures& isa_features, 1440 const CompilerOptions& compiler_options, 1441 OptimizingCompilerStats* stats) 1442 : CodeGenerator(graph, 1443 kNumberOfAllocatableRegisters, 1444 kNumberOfAllocatableFPRegisters, 1445 kNumberOfAllocatableRegisterPairs, 1446 callee_saved_core_registers.GetList(), 1447 callee_saved_fp_registers.GetList(), 1448 compiler_options, 1449 stats), 1450 block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1451 jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1452 location_builder_(graph, this), 1453 instruction_visitor_(graph, this), 1454 move_resolver_(graph->GetArena(), this), 1455 assembler_(graph->GetArena()), 1456 isa_features_(isa_features), 1457 uint32_literals_(std::less<uint32_t>(), 1458 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1459 uint64_literals_(std::less<uint64_t>(), 1460 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1461 pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1462 method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1463 pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1464 type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1465 pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1466 baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1467 jit_string_patches_(StringReferenceValueComparator(), 1468 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1469 jit_class_patches_(TypeReferenceValueComparator(), 1470 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { 1471 // Save the link register (containing the return address) to mimic Quick. 1472 AddAllocatedRegister(LocationFrom(lr)); 1473 } 1474 1475 #define __ GetVIXLAssembler()-> 1476 1477 void CodeGeneratorARM64::EmitJumpTables() { 1478 for (auto&& jump_table : jump_tables_) { 1479 jump_table->EmitTable(this); 1480 } 1481 } 1482 1483 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { 1484 EmitJumpTables(); 1485 // Ensure we emit the literal pool. 1486 __ FinalizeCode(); 1487 1488 CodeGenerator::Finalize(allocator); 1489 } 1490 1491 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { 1492 // Note: There are 6 kinds of moves: 1493 // 1. constant -> GPR/FPR (non-cycle) 1494 // 2. constant -> stack (non-cycle) 1495 // 3. GPR/FPR -> GPR/FPR 1496 // 4. GPR/FPR -> stack 1497 // 5. stack -> GPR/FPR 1498 // 6. stack -> stack (non-cycle) 1499 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 1500 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting 1501 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the 1502 // dependency. 1503 vixl_temps_.Open(GetVIXLAssembler()); 1504 } 1505 1506 void ParallelMoveResolverARM64::FinishEmitNativeCode() { 1507 vixl_temps_.Close(); 1508 } 1509 1510 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { 1511 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister 1512 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot 1513 || kind == Location::kSIMDStackSlot); 1514 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot) 1515 ? Location::kFpuRegister 1516 : Location::kRegister; 1517 Location scratch = GetScratchLocation(kind); 1518 if (!scratch.Equals(Location::NoLocation())) { 1519 return scratch; 1520 } 1521 // Allocate from VIXL temp registers. 1522 if (kind == Location::kRegister) { 1523 scratch = LocationFrom(vixl_temps_.AcquireX()); 1524 } else { 1525 DCHECK_EQ(kind, Location::kFpuRegister); 1526 scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() 1527 ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) 1528 : vixl_temps_.AcquireD()); 1529 } 1530 AddScratchLocation(scratch); 1531 return scratch; 1532 } 1533 1534 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { 1535 if (loc.IsRegister()) { 1536 vixl_temps_.Release(XRegisterFrom(loc)); 1537 } else { 1538 DCHECK(loc.IsFpuRegister()); 1539 vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc)); 1540 } 1541 RemoveScratchLocation(loc); 1542 } 1543 1544 void ParallelMoveResolverARM64::EmitMove(size_t index) { 1545 MoveOperands* move = moves_[index]; 1546 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid); 1547 } 1548 1549 void CodeGeneratorARM64::GenerateFrameEntry() { 1550 MacroAssembler* masm = GetVIXLAssembler(); 1551 __ Bind(&frame_entry_label_); 1552 1553 bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod(); 1554 if (do_overflow_check) { 1555 UseScratchRegisterScope temps(masm); 1556 Register temp = temps.AcquireX(); 1557 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1558 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); 1559 { 1560 // Ensure that between load and RecordPcInfo there are no pools emitted. 1561 ExactAssemblyScope eas(GetVIXLAssembler(), 1562 kInstructionSize, 1563 CodeBufferCheckScope::kExactSize); 1564 __ ldr(wzr, MemOperand(temp, 0)); 1565 RecordPcInfo(nullptr, 0); 1566 } 1567 } 1568 1569 if (!HasEmptyFrame()) { 1570 int frame_size = GetFrameSize(); 1571 // Stack layout: 1572 // sp[frame_size - 8] : lr. 1573 // ... : other preserved core registers. 1574 // ... : other preserved fp registers. 1575 // ... : reserved frame space. 1576 // sp[0] : current method. 1577 1578 // Save the current method if we need it. Note that we do not 1579 // do this in HCurrentMethod, as the instruction might have been removed 1580 // in the SSA graph. 1581 if (RequiresCurrentMethod()) { 1582 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); 1583 } else { 1584 __ Claim(frame_size); 1585 } 1586 GetAssembler()->cfi().AdjustCFAOffset(frame_size); 1587 GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), 1588 frame_size - GetCoreSpillSize()); 1589 GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), 1590 frame_size - FrameEntrySpillSize()); 1591 1592 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1593 // Initialize should_deoptimize flag to 0. 1594 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize); 1595 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); 1596 } 1597 } 1598 } 1599 1600 void CodeGeneratorARM64::GenerateFrameExit() { 1601 GetAssembler()->cfi().RememberState(); 1602 if (!HasEmptyFrame()) { 1603 int frame_size = GetFrameSize(); 1604 GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(), 1605 frame_size - FrameEntrySpillSize()); 1606 GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(), 1607 frame_size - GetCoreSpillSize()); 1608 __ Drop(frame_size); 1609 GetAssembler()->cfi().AdjustCFAOffset(-frame_size); 1610 } 1611 __ Ret(); 1612 GetAssembler()->cfi().RestoreState(); 1613 GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); 1614 } 1615 1616 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const { 1617 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0)); 1618 return CPURegList(CPURegister::kRegister, kXRegSize, 1619 core_spill_mask_); 1620 } 1621 1622 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const { 1623 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_, 1624 GetNumberOfFloatingPointRegisters())); 1625 return CPURegList(CPURegister::kFPRegister, kDRegSize, 1626 fpu_spill_mask_); 1627 } 1628 1629 void CodeGeneratorARM64::Bind(HBasicBlock* block) { 1630 __ Bind(GetLabelOf(block)); 1631 } 1632 1633 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { 1634 DCHECK(location.IsRegister()); 1635 __ Mov(RegisterFrom(location, Primitive::kPrimInt), value); 1636 } 1637 1638 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1639 if (location.IsRegister()) { 1640 locations->AddTemp(location); 1641 } else { 1642 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1643 } 1644 } 1645 1646 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { 1647 UseScratchRegisterScope temps(GetVIXLAssembler()); 1648 Register card = temps.AcquireX(); 1649 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. 1650 vixl::aarch64::Label done; 1651 if (value_can_be_null) { 1652 __ Cbz(value, &done); 1653 } 1654 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value())); 1655 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); 1656 __ Strb(card, MemOperand(card, temp.X())); 1657 if (value_can_be_null) { 1658 __ Bind(&done); 1659 } 1660 } 1661 1662 void CodeGeneratorARM64::SetupBlockedRegisters() const { 1663 // Blocked core registers: 1664 // lr : Runtime reserved. 1665 // tr : Runtime reserved. 1666 // mr : Runtime reserved. 1667 // ip1 : VIXL core temp. 1668 // ip0 : VIXL core temp. 1669 // 1670 // Blocked fp registers: 1671 // d31 : VIXL fp temp. 1672 CPURegList reserved_core_registers = vixl_reserved_core_registers; 1673 reserved_core_registers.Combine(runtime_reserved_core_registers); 1674 while (!reserved_core_registers.IsEmpty()) { 1675 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; 1676 } 1677 1678 CPURegList reserved_fp_registers = vixl_reserved_fp_registers; 1679 while (!reserved_fp_registers.IsEmpty()) { 1680 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true; 1681 } 1682 1683 if (GetGraph()->IsDebuggable()) { 1684 // Stubs do not save callee-save floating point registers. If the graph 1685 // is debuggable, we need to deal with these registers differently. For 1686 // now, just block them. 1687 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; 1688 while (!reserved_fp_registers_debuggable.IsEmpty()) { 1689 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true; 1690 } 1691 } 1692 } 1693 1694 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1695 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1696 __ Str(reg, MemOperand(sp, stack_index)); 1697 return kArm64WordSize; 1698 } 1699 1700 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1701 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1702 __ Ldr(reg, MemOperand(sp, stack_index)); 1703 return kArm64WordSize; 1704 } 1705 1706 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1707 FPRegister reg = FPRegister(reg_id, kDRegSize); 1708 __ Str(reg, MemOperand(sp, stack_index)); 1709 return kArm64WordSize; 1710 } 1711 1712 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1713 FPRegister reg = FPRegister(reg_id, kDRegSize); 1714 __ Ldr(reg, MemOperand(sp, stack_index)); 1715 return kArm64WordSize; 1716 } 1717 1718 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { 1719 stream << XRegister(reg); 1720 } 1721 1722 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1723 stream << DRegister(reg); 1724 } 1725 1726 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { 1727 if (constant->IsIntConstant()) { 1728 __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); 1729 } else if (constant->IsLongConstant()) { 1730 __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); 1731 } else if (constant->IsNullConstant()) { 1732 __ Mov(Register(destination), 0); 1733 } else if (constant->IsFloatConstant()) { 1734 __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue()); 1735 } else { 1736 DCHECK(constant->IsDoubleConstant()); 1737 __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue()); 1738 } 1739 } 1740 1741 1742 static bool CoherentConstantAndType(Location constant, Primitive::Type type) { 1743 DCHECK(constant.IsConstant()); 1744 HConstant* cst = constant.GetConstant(); 1745 return (cst->IsIntConstant() && type == Primitive::kPrimInt) || 1746 // Null is mapped to a core W register, which we associate with kPrimInt. 1747 (cst->IsNullConstant() && type == Primitive::kPrimInt) || 1748 (cst->IsLongConstant() && type == Primitive::kPrimLong) || 1749 (cst->IsFloatConstant() && type == Primitive::kPrimFloat) || 1750 (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); 1751 } 1752 1753 // Allocate a scratch register from the VIXL pool, querying first 1754 // the floating-point register pool, and then the core register 1755 // pool. This is essentially a reimplementation of 1756 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize 1757 // using a different allocation strategy. 1758 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, 1759 vixl::aarch64::UseScratchRegisterScope* temps, 1760 int size_in_bits) { 1761 return masm->GetScratchFPRegisterList()->IsEmpty() 1762 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits)) 1763 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits)); 1764 } 1765 1766 void CodeGeneratorARM64::MoveLocation(Location destination, 1767 Location source, 1768 Primitive::Type dst_type) { 1769 if (source.Equals(destination)) { 1770 return; 1771 } 1772 1773 // A valid move can always be inferred from the destination and source 1774 // locations. When moving from and to a register, the argument type can be 1775 // used to generate 32bit instead of 64bit moves. In debug mode we also 1776 // checks the coherency of the locations and the type. 1777 bool unspecified_type = (dst_type == Primitive::kPrimVoid); 1778 1779 if (destination.IsRegister() || destination.IsFpuRegister()) { 1780 if (unspecified_type) { 1781 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; 1782 if (source.IsStackSlot() || 1783 (src_cst != nullptr && (src_cst->IsIntConstant() 1784 || src_cst->IsFloatConstant() 1785 || src_cst->IsNullConstant()))) { 1786 // For stack slots and 32bit constants, a 64bit type is appropriate. 1787 dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; 1788 } else { 1789 // If the source is a double stack slot or a 64bit constant, a 64bit 1790 // type is appropriate. Else the source is a register, and since the 1791 // type has not been specified, we chose a 64bit type to force a 64bit 1792 // move. 1793 dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; 1794 } 1795 } 1796 DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) || 1797 (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type))); 1798 CPURegister dst = CPURegisterFrom(destination, dst_type); 1799 if (source.IsStackSlot() || source.IsDoubleStackSlot()) { 1800 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); 1801 __ Ldr(dst, StackOperandFrom(source)); 1802 } else if (source.IsSIMDStackSlot()) { 1803 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source)); 1804 } else if (source.IsConstant()) { 1805 DCHECK(CoherentConstantAndType(source, dst_type)); 1806 MoveConstant(dst, source.GetConstant()); 1807 } else if (source.IsRegister()) { 1808 if (destination.IsRegister()) { 1809 __ Mov(Register(dst), RegisterFrom(source, dst_type)); 1810 } else { 1811 DCHECK(destination.IsFpuRegister()); 1812 Primitive::Type source_type = Primitive::Is64BitType(dst_type) 1813 ? Primitive::kPrimLong 1814 : Primitive::kPrimInt; 1815 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); 1816 } 1817 } else { 1818 DCHECK(source.IsFpuRegister()); 1819 if (destination.IsRegister()) { 1820 Primitive::Type source_type = Primitive::Is64BitType(dst_type) 1821 ? Primitive::kPrimDouble 1822 : Primitive::kPrimFloat; 1823 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); 1824 } else { 1825 DCHECK(destination.IsFpuRegister()); 1826 if (GetGraph()->HasSIMD()) { 1827 __ Mov(QRegisterFrom(destination), QRegisterFrom(source)); 1828 } else { 1829 __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type)); 1830 } 1831 } 1832 } 1833 } else if (destination.IsSIMDStackSlot()) { 1834 if (source.IsFpuRegister()) { 1835 __ Str(QRegisterFrom(source), StackOperandFrom(destination)); 1836 } else { 1837 DCHECK(source.IsSIMDStackSlot()); 1838 UseScratchRegisterScope temps(GetVIXLAssembler()); 1839 if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) { 1840 Register temp = temps.AcquireX(); 1841 __ Ldr(temp, MemOperand(sp, source.GetStackIndex())); 1842 __ Str(temp, MemOperand(sp, destination.GetStackIndex())); 1843 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize)); 1844 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize)); 1845 } else { 1846 FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); 1847 __ Ldr(temp, StackOperandFrom(source)); 1848 __ Str(temp, StackOperandFrom(destination)); 1849 } 1850 } 1851 } else { // The destination is not a register. It must be a stack slot. 1852 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); 1853 if (source.IsRegister() || source.IsFpuRegister()) { 1854 if (unspecified_type) { 1855 if (source.IsRegister()) { 1856 dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; 1857 } else { 1858 dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; 1859 } 1860 } 1861 DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) && 1862 (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type))); 1863 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); 1864 } else if (source.IsConstant()) { 1865 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) 1866 << source << " " << dst_type; 1867 UseScratchRegisterScope temps(GetVIXLAssembler()); 1868 HConstant* src_cst = source.GetConstant(); 1869 CPURegister temp; 1870 if (src_cst->IsZeroBitPattern()) { 1871 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) 1872 ? Register(xzr) 1873 : Register(wzr); 1874 } else { 1875 if (src_cst->IsIntConstant()) { 1876 temp = temps.AcquireW(); 1877 } else if (src_cst->IsLongConstant()) { 1878 temp = temps.AcquireX(); 1879 } else if (src_cst->IsFloatConstant()) { 1880 temp = temps.AcquireS(); 1881 } else { 1882 DCHECK(src_cst->IsDoubleConstant()); 1883 temp = temps.AcquireD(); 1884 } 1885 MoveConstant(temp, src_cst); 1886 } 1887 __ Str(temp, StackOperandFrom(destination)); 1888 } else { 1889 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); 1890 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); 1891 UseScratchRegisterScope temps(GetVIXLAssembler()); 1892 // Use any scratch register (a core or a floating-point one) 1893 // from VIXL scratch register pools as a temporary. 1894 // 1895 // We used to only use the FP scratch register pool, but in some 1896 // rare cases the only register from this pool (D31) would 1897 // already be used (e.g. within a ParallelMove instruction, when 1898 // a move is blocked by a another move requiring a scratch FP 1899 // register, which would reserve D31). To prevent this issue, we 1900 // ask for a scratch register of any type (core or FP). 1901 // 1902 // Also, we start by asking for a FP scratch register first, as the 1903 // demand of scratch core registers is higher. This is why we 1904 // use AcquireFPOrCoreCPURegisterOfSize instead of 1905 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which 1906 // allocates core scratch registers first. 1907 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize( 1908 GetVIXLAssembler(), 1909 &temps, 1910 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize)); 1911 __ Ldr(temp, StackOperandFrom(source)); 1912 __ Str(temp, StackOperandFrom(destination)); 1913 } 1914 } 1915 } 1916 1917 void CodeGeneratorARM64::Load(Primitive::Type type, 1918 CPURegister dst, 1919 const MemOperand& src) { 1920 switch (type) { 1921 case Primitive::kPrimBoolean: 1922 __ Ldrb(Register(dst), src); 1923 break; 1924 case Primitive::kPrimByte: 1925 __ Ldrsb(Register(dst), src); 1926 break; 1927 case Primitive::kPrimShort: 1928 __ Ldrsh(Register(dst), src); 1929 break; 1930 case Primitive::kPrimChar: 1931 __ Ldrh(Register(dst), src); 1932 break; 1933 case Primitive::kPrimInt: 1934 case Primitive::kPrimNot: 1935 case Primitive::kPrimLong: 1936 case Primitive::kPrimFloat: 1937 case Primitive::kPrimDouble: 1938 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); 1939 __ Ldr(dst, src); 1940 break; 1941 case Primitive::kPrimVoid: 1942 LOG(FATAL) << "Unreachable type " << type; 1943 } 1944 } 1945 1946 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, 1947 CPURegister dst, 1948 const MemOperand& src, 1949 bool needs_null_check) { 1950 MacroAssembler* masm = GetVIXLAssembler(); 1951 UseScratchRegisterScope temps(masm); 1952 Register temp_base = temps.AcquireX(); 1953 Primitive::Type type = instruction->GetType(); 1954 1955 DCHECK(!src.IsPreIndex()); 1956 DCHECK(!src.IsPostIndex()); 1957 1958 // TODO(vixl): Let the MacroAssembler handle MemOperand. 1959 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src)); 1960 { 1961 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 1962 MemOperand base = MemOperand(temp_base); 1963 switch (type) { 1964 case Primitive::kPrimBoolean: 1965 { 1966 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1967 __ ldarb(Register(dst), base); 1968 if (needs_null_check) { 1969 MaybeRecordImplicitNullCheck(instruction); 1970 } 1971 } 1972 break; 1973 case Primitive::kPrimByte: 1974 { 1975 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1976 __ ldarb(Register(dst), base); 1977 if (needs_null_check) { 1978 MaybeRecordImplicitNullCheck(instruction); 1979 } 1980 } 1981 __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); 1982 break; 1983 case Primitive::kPrimChar: 1984 { 1985 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1986 __ ldarh(Register(dst), base); 1987 if (needs_null_check) { 1988 MaybeRecordImplicitNullCheck(instruction); 1989 } 1990 } 1991 break; 1992 case Primitive::kPrimShort: 1993 { 1994 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1995 __ ldarh(Register(dst), base); 1996 if (needs_null_check) { 1997 MaybeRecordImplicitNullCheck(instruction); 1998 } 1999 } 2000 __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); 2001 break; 2002 case Primitive::kPrimInt: 2003 case Primitive::kPrimNot: 2004 case Primitive::kPrimLong: 2005 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); 2006 { 2007 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2008 __ ldar(Register(dst), base); 2009 if (needs_null_check) { 2010 MaybeRecordImplicitNullCheck(instruction); 2011 } 2012 } 2013 break; 2014 case Primitive::kPrimFloat: 2015 case Primitive::kPrimDouble: { 2016 DCHECK(dst.IsFPRegister()); 2017 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); 2018 2019 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 2020 { 2021 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2022 __ ldar(temp, base); 2023 if (needs_null_check) { 2024 MaybeRecordImplicitNullCheck(instruction); 2025 } 2026 } 2027 __ Fmov(FPRegister(dst), temp); 2028 break; 2029 } 2030 case Primitive::kPrimVoid: 2031 LOG(FATAL) << "Unreachable type " << type; 2032 } 2033 } 2034 } 2035 2036 void CodeGeneratorARM64::Store(Primitive::Type type, 2037 CPURegister src, 2038 const MemOperand& dst) { 2039 switch (type) { 2040 case Primitive::kPrimBoolean: 2041 case Primitive::kPrimByte: 2042 __ Strb(Register(src), dst); 2043 break; 2044 case Primitive::kPrimChar: 2045 case Primitive::kPrimShort: 2046 __ Strh(Register(src), dst); 2047 break; 2048 case Primitive::kPrimInt: 2049 case Primitive::kPrimNot: 2050 case Primitive::kPrimLong: 2051 case Primitive::kPrimFloat: 2052 case Primitive::kPrimDouble: 2053 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); 2054 __ Str(src, dst); 2055 break; 2056 case Primitive::kPrimVoid: 2057 LOG(FATAL) << "Unreachable type " << type; 2058 } 2059 } 2060 2061 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, 2062 Primitive::Type type, 2063 CPURegister src, 2064 const MemOperand& dst, 2065 bool needs_null_check) { 2066 MacroAssembler* masm = GetVIXLAssembler(); 2067 UseScratchRegisterScope temps(GetVIXLAssembler()); 2068 Register temp_base = temps.AcquireX(); 2069 2070 DCHECK(!dst.IsPreIndex()); 2071 DCHECK(!dst.IsPostIndex()); 2072 2073 // TODO(vixl): Let the MacroAssembler handle this. 2074 Operand op = OperandFromMemOperand(dst); 2075 __ Add(temp_base, dst.GetBaseRegister(), op); 2076 MemOperand base = MemOperand(temp_base); 2077 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2078 switch (type) { 2079 case Primitive::kPrimBoolean: 2080 case Primitive::kPrimByte: 2081 { 2082 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2083 __ stlrb(Register(src), base); 2084 if (needs_null_check) { 2085 MaybeRecordImplicitNullCheck(instruction); 2086 } 2087 } 2088 break; 2089 case Primitive::kPrimChar: 2090 case Primitive::kPrimShort: 2091 { 2092 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2093 __ stlrh(Register(src), base); 2094 if (needs_null_check) { 2095 MaybeRecordImplicitNullCheck(instruction); 2096 } 2097 } 2098 break; 2099 case Primitive::kPrimInt: 2100 case Primitive::kPrimNot: 2101 case Primitive::kPrimLong: 2102 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); 2103 { 2104 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2105 __ stlr(Register(src), base); 2106 if (needs_null_check) { 2107 MaybeRecordImplicitNullCheck(instruction); 2108 } 2109 } 2110 break; 2111 case Primitive::kPrimFloat: 2112 case Primitive::kPrimDouble: { 2113 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); 2114 Register temp_src; 2115 if (src.IsZero()) { 2116 // The zero register is used to avoid synthesizing zero constants. 2117 temp_src = Register(src); 2118 } else { 2119 DCHECK(src.IsFPRegister()); 2120 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 2121 __ Fmov(temp_src, FPRegister(src)); 2122 } 2123 { 2124 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2125 __ stlr(temp_src, base); 2126 if (needs_null_check) { 2127 MaybeRecordImplicitNullCheck(instruction); 2128 } 2129 } 2130 break; 2131 } 2132 case Primitive::kPrimVoid: 2133 LOG(FATAL) << "Unreachable type " << type; 2134 } 2135 } 2136 2137 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, 2138 HInstruction* instruction, 2139 uint32_t dex_pc, 2140 SlowPathCode* slow_path) { 2141 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 2142 2143 __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value())); 2144 { 2145 // Ensure the pc position is recorded immediately after the `blr` instruction. 2146 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 2147 __ blr(lr); 2148 if (EntrypointRequiresStackMap(entrypoint)) { 2149 RecordPcInfo(instruction, dex_pc, slow_path); 2150 } 2151 } 2152 } 2153 2154 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 2155 HInstruction* instruction, 2156 SlowPathCode* slow_path) { 2157 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 2158 __ Ldr(lr, MemOperand(tr, entry_point_offset)); 2159 __ Blr(lr); 2160 } 2161 2162 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, 2163 Register class_reg) { 2164 UseScratchRegisterScope temps(GetVIXLAssembler()); 2165 Register temp = temps.AcquireW(); 2166 size_t status_offset = mirror::Class::StatusOffset().SizeValue(); 2167 2168 // Even if the initialized flag is set, we need to ensure consistent memory ordering. 2169 // TODO(vixl): Let the MacroAssembler handle MemOperand. 2170 __ Add(temp, class_reg, status_offset); 2171 __ Ldar(temp, HeapOperand(temp)); 2172 __ Cmp(temp, mirror::Class::kStatusInitialized); 2173 __ B(lt, slow_path->GetEntryLabel()); 2174 __ Bind(slow_path->GetExitLabel()); 2175 } 2176 2177 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { 2178 BarrierType type = BarrierAll; 2179 2180 switch (kind) { 2181 case MemBarrierKind::kAnyAny: 2182 case MemBarrierKind::kAnyStore: { 2183 type = BarrierAll; 2184 break; 2185 } 2186 case MemBarrierKind::kLoadAny: { 2187 type = BarrierReads; 2188 break; 2189 } 2190 case MemBarrierKind::kStoreStore: { 2191 type = BarrierWrites; 2192 break; 2193 } 2194 default: 2195 LOG(FATAL) << "Unexpected memory barrier " << kind; 2196 } 2197 __ Dmb(InnerShareable, type); 2198 } 2199 2200 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, 2201 HBasicBlock* successor) { 2202 SuspendCheckSlowPathARM64* slow_path = 2203 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); 2204 if (slow_path == nullptr) { 2205 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); 2206 instruction->SetSlowPath(slow_path); 2207 codegen_->AddSlowPath(slow_path); 2208 if (successor != nullptr) { 2209 DCHECK(successor->IsLoopHeader()); 2210 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); 2211 } 2212 } else { 2213 DCHECK_EQ(slow_path->GetSuccessor(), successor); 2214 } 2215 2216 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 2217 Register temp = temps.AcquireW(); 2218 2219 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue())); 2220 if (successor == nullptr) { 2221 __ Cbnz(temp, slow_path->GetEntryLabel()); 2222 __ Bind(slow_path->GetReturnLabel()); 2223 } else { 2224 __ Cbz(temp, codegen_->GetLabelOf(successor)); 2225 __ B(slow_path->GetEntryLabel()); 2226 // slow_path will return to GetLabelOf(successor). 2227 } 2228 } 2229 2230 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, 2231 CodeGeneratorARM64* codegen) 2232 : InstructionCodeGenerator(graph, codegen), 2233 assembler_(codegen->GetAssembler()), 2234 codegen_(codegen) {} 2235 2236 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ 2237 /* No unimplemented IR. */ 2238 2239 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode 2240 2241 enum UnimplementedInstructionBreakCode { 2242 // Using a base helps identify when we hit such breakpoints. 2243 UnimplementedInstructionBreakCodeBaseCode = 0x900, 2244 #define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name), 2245 FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION) 2246 #undef ENUM_UNIMPLEMENTED_INSTRUCTION 2247 }; 2248 2249 #define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name) \ 2250 void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) { \ 2251 __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name)); \ 2252 } \ 2253 void LocationsBuilderARM64::Visit##name(H##name* instr) { \ 2254 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \ 2255 locations->SetOut(Location::Any()); \ 2256 } 2257 FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS) 2258 #undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS 2259 2260 #undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE 2261 #undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION 2262 2263 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { 2264 DCHECK_EQ(instr->InputCount(), 2U); 2265 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); 2266 Primitive::Type type = instr->GetResultType(); 2267 switch (type) { 2268 case Primitive::kPrimInt: 2269 case Primitive::kPrimLong: 2270 locations->SetInAt(0, Location::RequiresRegister()); 2271 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr)); 2272 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2273 break; 2274 2275 case Primitive::kPrimFloat: 2276 case Primitive::kPrimDouble: 2277 locations->SetInAt(0, Location::RequiresFpuRegister()); 2278 locations->SetInAt(1, Location::RequiresFpuRegister()); 2279 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2280 break; 2281 2282 default: 2283 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type; 2284 } 2285 } 2286 2287 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, 2288 const FieldInfo& field_info) { 2289 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 2290 2291 bool object_field_get_with_read_barrier = 2292 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); 2293 LocationSummary* locations = 2294 new (GetGraph()->GetArena()) LocationSummary(instruction, 2295 object_field_get_with_read_barrier ? 2296 LocationSummary::kCallOnSlowPath : 2297 LocationSummary::kNoCall); 2298 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 2299 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2300 // We need a temporary register for the read barrier marking slow 2301 // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier. 2302 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 2303 !Runtime::Current()->UseJitCompilation() && 2304 !field_info.IsVolatile()) { 2305 // If link-time thunks for the Baker read barrier are enabled, for AOT 2306 // non-volatile loads we need a temporary only if the offset is too big. 2307 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { 2308 locations->AddTemp(FixedTempLocation()); 2309 } 2310 } else { 2311 locations->AddTemp(Location::RequiresRegister()); 2312 } 2313 } 2314 locations->SetInAt(0, Location::RequiresRegister()); 2315 if (Primitive::IsFloatingPointType(instruction->GetType())) { 2316 locations->SetOut(Location::RequiresFpuRegister()); 2317 } else { 2318 // The output overlaps for an object field get when read barriers 2319 // are enabled: we do not want the load to overwrite the object's 2320 // location, as we need it to emit the read barrier. 2321 locations->SetOut( 2322 Location::RequiresRegister(), 2323 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 2324 } 2325 } 2326 2327 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, 2328 const FieldInfo& field_info) { 2329 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 2330 LocationSummary* locations = instruction->GetLocations(); 2331 Location base_loc = locations->InAt(0); 2332 Location out = locations->Out(); 2333 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 2334 Primitive::Type field_type = field_info.GetFieldType(); 2335 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); 2336 2337 if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2338 // Object FieldGet with Baker's read barrier case. 2339 // /* HeapReference<Object> */ out = *(base + offset) 2340 Register base = RegisterFrom(base_loc, Primitive::kPrimNot); 2341 Location maybe_temp = 2342 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 2343 // Note that potential implicit null checks are handled in this 2344 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. 2345 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2346 instruction, 2347 out, 2348 base, 2349 offset, 2350 maybe_temp, 2351 /* needs_null_check */ true, 2352 field_info.IsVolatile()); 2353 } else { 2354 // General case. 2355 if (field_info.IsVolatile()) { 2356 // Note that a potential implicit null check is handled in this 2357 // CodeGeneratorARM64::LoadAcquire call. 2358 // NB: LoadAcquire will record the pc info if needed. 2359 codegen_->LoadAcquire( 2360 instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); 2361 } else { 2362 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2363 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2364 codegen_->Load(field_type, OutputCPURegister(instruction), field); 2365 codegen_->MaybeRecordImplicitNullCheck(instruction); 2366 } 2367 if (field_type == Primitive::kPrimNot) { 2368 // If read barriers are enabled, emit read barriers other than 2369 // Baker's using a slow path (and also unpoison the loaded 2370 // reference, if heap poisoning is enabled). 2371 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 2372 } 2373 } 2374 } 2375 2376 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { 2377 LocationSummary* locations = 2378 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 2379 locations->SetInAt(0, Location::RequiresRegister()); 2380 if (IsConstantZeroBitPattern(instruction->InputAt(1))) { 2381 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 2382 } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 2383 locations->SetInAt(1, Location::RequiresFpuRegister()); 2384 } else { 2385 locations->SetInAt(1, Location::RequiresRegister()); 2386 } 2387 } 2388 2389 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, 2390 const FieldInfo& field_info, 2391 bool value_can_be_null) { 2392 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 2393 2394 Register obj = InputRegisterAt(instruction, 0); 2395 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1); 2396 CPURegister source = value; 2397 Offset offset = field_info.GetFieldOffset(); 2398 Primitive::Type field_type = field_info.GetFieldType(); 2399 2400 { 2401 // We use a block to end the scratch scope before the write barrier, thus 2402 // freeing the temporary registers so they can be used in `MarkGCCard`. 2403 UseScratchRegisterScope temps(GetVIXLAssembler()); 2404 2405 if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) { 2406 DCHECK(value.IsW()); 2407 Register temp = temps.AcquireW(); 2408 __ Mov(temp, value.W()); 2409 GetAssembler()->PoisonHeapReference(temp.W()); 2410 source = temp; 2411 } 2412 2413 if (field_info.IsVolatile()) { 2414 codegen_->StoreRelease( 2415 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true); 2416 } else { 2417 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2418 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2419 codegen_->Store(field_type, source, HeapOperand(obj, offset)); 2420 codegen_->MaybeRecordImplicitNullCheck(instruction); 2421 } 2422 } 2423 2424 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 2425 codegen_->MarkGCCard(obj, Register(value), value_can_be_null); 2426 } 2427 } 2428 2429 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { 2430 Primitive::Type type = instr->GetType(); 2431 2432 switch (type) { 2433 case Primitive::kPrimInt: 2434 case Primitive::kPrimLong: { 2435 Register dst = OutputRegister(instr); 2436 Register lhs = InputRegisterAt(instr, 0); 2437 Operand rhs = InputOperandAt(instr, 1); 2438 if (instr->IsAdd()) { 2439 __ Add(dst, lhs, rhs); 2440 } else if (instr->IsAnd()) { 2441 __ And(dst, lhs, rhs); 2442 } else if (instr->IsOr()) { 2443 __ Orr(dst, lhs, rhs); 2444 } else if (instr->IsSub()) { 2445 __ Sub(dst, lhs, rhs); 2446 } else if (instr->IsRor()) { 2447 if (rhs.IsImmediate()) { 2448 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1); 2449 __ Ror(dst, lhs, shift); 2450 } else { 2451 // Ensure shift distance is in the same size register as the result. If 2452 // we are rotating a long and the shift comes in a w register originally, 2453 // we don't need to sxtw for use as an x since the shift distances are 2454 // all & reg_bits - 1. 2455 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); 2456 } 2457 } else { 2458 DCHECK(instr->IsXor()); 2459 __ Eor(dst, lhs, rhs); 2460 } 2461 break; 2462 } 2463 case Primitive::kPrimFloat: 2464 case Primitive::kPrimDouble: { 2465 FPRegister dst = OutputFPRegister(instr); 2466 FPRegister lhs = InputFPRegisterAt(instr, 0); 2467 FPRegister rhs = InputFPRegisterAt(instr, 1); 2468 if (instr->IsAdd()) { 2469 __ Fadd(dst, lhs, rhs); 2470 } else if (instr->IsSub()) { 2471 __ Fsub(dst, lhs, rhs); 2472 } else { 2473 LOG(FATAL) << "Unexpected floating-point binary operation"; 2474 } 2475 break; 2476 } 2477 default: 2478 LOG(FATAL) << "Unexpected binary operation type " << type; 2479 } 2480 } 2481 2482 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { 2483 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2484 2485 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); 2486 Primitive::Type type = instr->GetResultType(); 2487 switch (type) { 2488 case Primitive::kPrimInt: 2489 case Primitive::kPrimLong: { 2490 locations->SetInAt(0, Location::RequiresRegister()); 2491 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); 2492 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2493 break; 2494 } 2495 default: 2496 LOG(FATAL) << "Unexpected shift type " << type; 2497 } 2498 } 2499 2500 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) { 2501 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2502 2503 Primitive::Type type = instr->GetType(); 2504 switch (type) { 2505 case Primitive::kPrimInt: 2506 case Primitive::kPrimLong: { 2507 Register dst = OutputRegister(instr); 2508 Register lhs = InputRegisterAt(instr, 0); 2509 Operand rhs = InputOperandAt(instr, 1); 2510 if (rhs.IsImmediate()) { 2511 uint32_t shift_value = rhs.GetImmediate() & 2512 (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance); 2513 if (instr->IsShl()) { 2514 __ Lsl(dst, lhs, shift_value); 2515 } else if (instr->IsShr()) { 2516 __ Asr(dst, lhs, shift_value); 2517 } else { 2518 __ Lsr(dst, lhs, shift_value); 2519 } 2520 } else { 2521 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W(); 2522 2523 if (instr->IsShl()) { 2524 __ Lsl(dst, lhs, rhs_reg); 2525 } else if (instr->IsShr()) { 2526 __ Asr(dst, lhs, rhs_reg); 2527 } else { 2528 __ Lsr(dst, lhs, rhs_reg); 2529 } 2530 } 2531 break; 2532 } 2533 default: 2534 LOG(FATAL) << "Unexpected shift operation type " << type; 2535 } 2536 } 2537 2538 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) { 2539 HandleBinaryOp(instruction); 2540 } 2541 2542 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) { 2543 HandleBinaryOp(instruction); 2544 } 2545 2546 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) { 2547 HandleBinaryOp(instruction); 2548 } 2549 2550 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { 2551 HandleBinaryOp(instruction); 2552 } 2553 2554 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2555 DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType(); 2556 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); 2557 locations->SetInAt(0, Location::RequiresRegister()); 2558 // There is no immediate variant of negated bitwise instructions in AArch64. 2559 locations->SetInAt(1, Location::RequiresRegister()); 2560 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2561 } 2562 2563 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2564 Register dst = OutputRegister(instr); 2565 Register lhs = InputRegisterAt(instr, 0); 2566 Register rhs = InputRegisterAt(instr, 1); 2567 2568 switch (instr->GetOpKind()) { 2569 case HInstruction::kAnd: 2570 __ Bic(dst, lhs, rhs); 2571 break; 2572 case HInstruction::kOr: 2573 __ Orn(dst, lhs, rhs); 2574 break; 2575 case HInstruction::kXor: 2576 __ Eon(dst, lhs, rhs); 2577 break; 2578 default: 2579 LOG(FATAL) << "Unreachable"; 2580 } 2581 } 2582 2583 void LocationsBuilderARM64::VisitDataProcWithShifterOp( 2584 HDataProcWithShifterOp* instruction) { 2585 DCHECK(instruction->GetType() == Primitive::kPrimInt || 2586 instruction->GetType() == Primitive::kPrimLong); 2587 LocationSummary* locations = 2588 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 2589 if (instruction->GetInstrKind() == HInstruction::kNeg) { 2590 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); 2591 } else { 2592 locations->SetInAt(0, Location::RequiresRegister()); 2593 } 2594 locations->SetInAt(1, Location::RequiresRegister()); 2595 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2596 } 2597 2598 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp( 2599 HDataProcWithShifterOp* instruction) { 2600 Primitive::Type type = instruction->GetType(); 2601 HInstruction::InstructionKind kind = instruction->GetInstrKind(); 2602 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 2603 Register out = OutputRegister(instruction); 2604 Register left; 2605 if (kind != HInstruction::kNeg) { 2606 left = InputRegisterAt(instruction, 0); 2607 } 2608 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the 2609 // shifter operand operation, the IR generating `right_reg` (input to the type 2610 // conversion) can have a different type from the current instruction's type, 2611 // so we manually indicate the type. 2612 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); 2613 Operand right_operand(0); 2614 2615 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); 2616 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { 2617 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); 2618 } else { 2619 right_operand = Operand(right_reg, 2620 helpers::ShiftFromOpKind(op_kind), 2621 instruction->GetShiftAmount()); 2622 } 2623 2624 // Logical binary operations do not support extension operations in the 2625 // operand. Note that VIXL would still manage if it was passed by generating 2626 // the extension as a separate instruction. 2627 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. 2628 DCHECK(!right_operand.IsExtendedRegister() || 2629 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && 2630 kind != HInstruction::kNeg)); 2631 switch (kind) { 2632 case HInstruction::kAdd: 2633 __ Add(out, left, right_operand); 2634 break; 2635 case HInstruction::kAnd: 2636 __ And(out, left, right_operand); 2637 break; 2638 case HInstruction::kNeg: 2639 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero()); 2640 __ Neg(out, right_operand); 2641 break; 2642 case HInstruction::kOr: 2643 __ Orr(out, left, right_operand); 2644 break; 2645 case HInstruction::kSub: 2646 __ Sub(out, left, right_operand); 2647 break; 2648 case HInstruction::kXor: 2649 __ Eor(out, left, right_operand); 2650 break; 2651 default: 2652 LOG(FATAL) << "Unexpected operation kind: " << kind; 2653 UNREACHABLE(); 2654 } 2655 } 2656 2657 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2658 LocationSummary* locations = 2659 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 2660 locations->SetInAt(0, Location::RequiresRegister()); 2661 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction)); 2662 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2663 } 2664 2665 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2666 __ Add(OutputRegister(instruction), 2667 InputRegisterAt(instruction, 0), 2668 Operand(InputOperandAt(instruction, 1))); 2669 } 2670 2671 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { 2672 LocationSummary* locations = 2673 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 2674 2675 HIntConstant* shift = instruction->GetShift()->AsIntConstant(); 2676 2677 locations->SetInAt(0, Location::RequiresRegister()); 2678 // For byte case we don't need to shift the index variable so we can encode the data offset into 2679 // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist 2680 // data offset constant generation out of the loop and reduce the critical path length in the 2681 // loop. 2682 locations->SetInAt(1, shift->GetValue() == 0 2683 ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) 2684 : Location::RequiresRegister()); 2685 locations->SetInAt(2, Location::ConstantLocation(shift)); 2686 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2687 } 2688 2689 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( 2690 HIntermediateAddressIndex* instruction) { 2691 Register index_reg = InputRegisterAt(instruction, 0); 2692 uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2)); 2693 uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); 2694 2695 if (shift == 0) { 2696 __ Add(OutputRegister(instruction), index_reg, offset); 2697 } else { 2698 Register offset_reg = InputRegisterAt(instruction, 1); 2699 __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift)); 2700 } 2701 } 2702 2703 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2704 LocationSummary* locations = 2705 new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); 2706 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2707 if (instr->GetOpKind() == HInstruction::kSub && 2708 accumulator->IsConstant() && 2709 accumulator->AsConstant()->IsArithmeticZero()) { 2710 // Don't allocate register for Mneg instruction. 2711 } else { 2712 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, 2713 Location::RequiresRegister()); 2714 } 2715 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); 2716 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); 2717 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2718 } 2719 2720 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2721 Register res = OutputRegister(instr); 2722 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex); 2723 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex); 2724 2725 // Avoid emitting code that could trigger Cortex A53's erratum 835769. 2726 // This fixup should be carried out for all multiply-accumulate instructions: 2727 // madd, msub, smaddl, smsubl, umaddl and umsubl. 2728 if (instr->GetType() == Primitive::kPrimLong && 2729 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { 2730 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); 2731 vixl::aarch64::Instruction* prev = 2732 masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize; 2733 if (prev->IsLoadOrStore()) { 2734 // Make sure we emit only exactly one nop. 2735 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2736 __ nop(); 2737 } 2738 } 2739 2740 if (instr->GetOpKind() == HInstruction::kAdd) { 2741 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2742 __ Madd(res, mul_left, mul_right, accumulator); 2743 } else { 2744 DCHECK(instr->GetOpKind() == HInstruction::kSub); 2745 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2746 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) { 2747 __ Mneg(res, mul_left, mul_right); 2748 } else { 2749 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2750 __ Msub(res, mul_left, mul_right, accumulator); 2751 } 2752 } 2753 } 2754 2755 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { 2756 bool object_array_get_with_read_barrier = 2757 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); 2758 LocationSummary* locations = 2759 new (GetGraph()->GetArena()) LocationSummary(instruction, 2760 object_array_get_with_read_barrier ? 2761 LocationSummary::kCallOnSlowPath : 2762 LocationSummary::kNoCall); 2763 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 2764 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2765 // We need a temporary register for the read barrier marking slow 2766 // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier. 2767 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 2768 !Runtime::Current()->UseJitCompilation() && 2769 instruction->GetIndex()->IsConstant()) { 2770 // Array loads with constant index are treated as field loads. 2771 // If link-time thunks for the Baker read barrier are enabled, for AOT 2772 // constant index loads we need a temporary only if the offset is too big. 2773 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2774 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); 2775 offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); 2776 if (offset >= kReferenceLoadMinFarOffset) { 2777 locations->AddTemp(FixedTempLocation()); 2778 } 2779 } else { 2780 locations->AddTemp(Location::RequiresRegister()); 2781 } 2782 } 2783 locations->SetInAt(0, Location::RequiresRegister()); 2784 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2785 if (Primitive::IsFloatingPointType(instruction->GetType())) { 2786 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2787 } else { 2788 // The output overlaps in the case of an object array get with 2789 // read barriers enabled: we do not want the move to overwrite the 2790 // array's location, as we need it to emit the read barrier. 2791 locations->SetOut( 2792 Location::RequiresRegister(), 2793 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 2794 } 2795 } 2796 2797 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { 2798 Primitive::Type type = instruction->GetType(); 2799 Register obj = InputRegisterAt(instruction, 0); 2800 LocationSummary* locations = instruction->GetLocations(); 2801 Location index = locations->InAt(1); 2802 Location out = locations->Out(); 2803 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2804 const bool maybe_compressed_char_at = mirror::kUseStringCompression && 2805 instruction->IsStringCharAt(); 2806 MacroAssembler* masm = GetVIXLAssembler(); 2807 UseScratchRegisterScope temps(masm); 2808 2809 // The read barrier instrumentation of object ArrayGet instructions 2810 // does not support the HIntermediateAddress instruction. 2811 DCHECK(!((type == Primitive::kPrimNot) && 2812 instruction->GetArray()->IsIntermediateAddress() && 2813 kEmitCompilerReadBarrier)); 2814 2815 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2816 // Object ArrayGet with Baker's read barrier case. 2817 // Note that a potential implicit null check is handled in the 2818 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. 2819 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); 2820 if (index.IsConstant()) { 2821 // Array load with a constant index can be treated as a field load. 2822 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); 2823 Location maybe_temp = 2824 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 2825 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 2826 out, 2827 obj.W(), 2828 offset, 2829 maybe_temp, 2830 /* needs_null_check */ false, 2831 /* use_load_acquire */ false); 2832 } else { 2833 Register temp = WRegisterFrom(locations->GetTemp(0)); 2834 codegen_->GenerateArrayLoadWithBakerReadBarrier( 2835 instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); 2836 } 2837 } else { 2838 // General case. 2839 MemOperand source = HeapOperand(obj); 2840 Register length; 2841 if (maybe_compressed_char_at) { 2842 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 2843 length = temps.AcquireW(); 2844 { 2845 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2846 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2847 2848 if (instruction->GetArray()->IsIntermediateAddress()) { 2849 DCHECK_LT(count_offset, offset); 2850 int64_t adjusted_offset = 2851 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset); 2852 // Note that `adjusted_offset` is negative, so this will be a LDUR. 2853 __ Ldr(length, MemOperand(obj.X(), adjusted_offset)); 2854 } else { 2855 __ Ldr(length, HeapOperand(obj, count_offset)); 2856 } 2857 codegen_->MaybeRecordImplicitNullCheck(instruction); 2858 } 2859 } 2860 if (index.IsConstant()) { 2861 if (maybe_compressed_char_at) { 2862 vixl::aarch64::Label uncompressed_load, done; 2863 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2864 "Expecting 0=compressed, 1=uncompressed"); 2865 __ Tbnz(length.W(), 0, &uncompressed_load); 2866 __ Ldrb(Register(OutputCPURegister(instruction)), 2867 HeapOperand(obj, offset + Int64ConstantFrom(index))); 2868 __ B(&done); 2869 __ Bind(&uncompressed_load); 2870 __ Ldrh(Register(OutputCPURegister(instruction)), 2871 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); 2872 __ Bind(&done); 2873 } else { 2874 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); 2875 source = HeapOperand(obj, offset); 2876 } 2877 } else { 2878 Register temp = temps.AcquireSameSizeAs(obj); 2879 if (instruction->GetArray()->IsIntermediateAddress()) { 2880 // We do not need to compute the intermediate address from the array: the 2881 // input instruction has done it already. See the comment in 2882 // `TryExtractArrayAccessAddress()`. 2883 if (kIsDebugBuild) { 2884 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); 2885 DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); 2886 } 2887 temp = obj; 2888 } else { 2889 __ Add(temp, obj, offset); 2890 } 2891 if (maybe_compressed_char_at) { 2892 vixl::aarch64::Label uncompressed_load, done; 2893 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2894 "Expecting 0=compressed, 1=uncompressed"); 2895 __ Tbnz(length.W(), 0, &uncompressed_load); 2896 __ Ldrb(Register(OutputCPURegister(instruction)), 2897 HeapOperand(temp, XRegisterFrom(index), LSL, 0)); 2898 __ B(&done); 2899 __ Bind(&uncompressed_load); 2900 __ Ldrh(Register(OutputCPURegister(instruction)), 2901 HeapOperand(temp, XRegisterFrom(index), LSL, 1)); 2902 __ Bind(&done); 2903 } else { 2904 source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); 2905 } 2906 } 2907 if (!maybe_compressed_char_at) { 2908 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2909 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2910 codegen_->Load(type, OutputCPURegister(instruction), source); 2911 codegen_->MaybeRecordImplicitNullCheck(instruction); 2912 } 2913 2914 if (type == Primitive::kPrimNot) { 2915 static_assert( 2916 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 2917 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 2918 Location obj_loc = locations->InAt(0); 2919 if (index.IsConstant()) { 2920 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); 2921 } else { 2922 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); 2923 } 2924 } 2925 } 2926 } 2927 2928 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { 2929 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 2930 locations->SetInAt(0, Location::RequiresRegister()); 2931 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2932 } 2933 2934 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { 2935 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 2936 vixl::aarch64::Register out = OutputRegister(instruction); 2937 { 2938 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2939 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2940 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset)); 2941 codegen_->MaybeRecordImplicitNullCheck(instruction); 2942 } 2943 // Mask out compression flag from String's array length. 2944 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 2945 __ Lsr(out.W(), out.W(), 1u); 2946 } 2947 } 2948 2949 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { 2950 Primitive::Type value_type = instruction->GetComponentType(); 2951 2952 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2953 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( 2954 instruction, 2955 may_need_runtime_call_for_type_check ? 2956 LocationSummary::kCallOnSlowPath : 2957 LocationSummary::kNoCall); 2958 locations->SetInAt(0, Location::RequiresRegister()); 2959 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2960 if (IsConstantZeroBitPattern(instruction->InputAt(2))) { 2961 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 2962 } else if (Primitive::IsFloatingPointType(value_type)) { 2963 locations->SetInAt(2, Location::RequiresFpuRegister()); 2964 } else { 2965 locations->SetInAt(2, Location::RequiresRegister()); 2966 } 2967 } 2968 2969 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { 2970 Primitive::Type value_type = instruction->GetComponentType(); 2971 LocationSummary* locations = instruction->GetLocations(); 2972 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2973 bool needs_write_barrier = 2974 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 2975 2976 Register array = InputRegisterAt(instruction, 0); 2977 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2); 2978 CPURegister source = value; 2979 Location index = locations->InAt(1); 2980 size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); 2981 MemOperand destination = HeapOperand(array); 2982 MacroAssembler* masm = GetVIXLAssembler(); 2983 2984 if (!needs_write_barrier) { 2985 DCHECK(!may_need_runtime_call_for_type_check); 2986 if (index.IsConstant()) { 2987 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); 2988 destination = HeapOperand(array, offset); 2989 } else { 2990 UseScratchRegisterScope temps(masm); 2991 Register temp = temps.AcquireSameSizeAs(array); 2992 if (instruction->GetArray()->IsIntermediateAddress()) { 2993 // We do not need to compute the intermediate address from the array: the 2994 // input instruction has done it already. See the comment in 2995 // `TryExtractArrayAccessAddress()`. 2996 if (kIsDebugBuild) { 2997 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); 2998 DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); 2999 } 3000 temp = array; 3001 } else { 3002 __ Add(temp, array, offset); 3003 } 3004 destination = HeapOperand(temp, 3005 XRegisterFrom(index), 3006 LSL, 3007 Primitive::ComponentSizeShift(value_type)); 3008 } 3009 { 3010 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 3011 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3012 codegen_->Store(value_type, value, destination); 3013 codegen_->MaybeRecordImplicitNullCheck(instruction); 3014 } 3015 } else { 3016 DCHECK(!instruction->GetArray()->IsIntermediateAddress()); 3017 vixl::aarch64::Label done; 3018 SlowPathCodeARM64* slow_path = nullptr; 3019 { 3020 // We use a block to end the scratch scope before the write barrier, thus 3021 // freeing the temporary registers so they can be used in `MarkGCCard`. 3022 UseScratchRegisterScope temps(masm); 3023 Register temp = temps.AcquireSameSizeAs(array); 3024 if (index.IsConstant()) { 3025 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); 3026 destination = HeapOperand(array, offset); 3027 } else { 3028 destination = HeapOperand(temp, 3029 XRegisterFrom(index), 3030 LSL, 3031 Primitive::ComponentSizeShift(value_type)); 3032 } 3033 3034 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3035 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3036 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3037 3038 if (may_need_runtime_call_for_type_check) { 3039 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); 3040 codegen_->AddSlowPath(slow_path); 3041 if (instruction->GetValueCanBeNull()) { 3042 vixl::aarch64::Label non_zero; 3043 __ Cbnz(Register(value), &non_zero); 3044 if (!index.IsConstant()) { 3045 __ Add(temp, array, offset); 3046 } 3047 { 3048 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools 3049 // emitted. 3050 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3051 __ Str(wzr, destination); 3052 codegen_->MaybeRecordImplicitNullCheck(instruction); 3053 } 3054 __ B(&done); 3055 __ Bind(&non_zero); 3056 } 3057 3058 // Note that when Baker read barriers are enabled, the type 3059 // checks are performed without read barriers. This is fine, 3060 // even in the case where a class object is in the from-space 3061 // after the flip, as a comparison involving such a type would 3062 // not produce a false positive; it may of course produce a 3063 // false negative, in which case we would take the ArraySet 3064 // slow path. 3065 3066 Register temp2 = temps.AcquireSameSizeAs(array); 3067 // /* HeapReference<Class> */ temp = array->klass_ 3068 { 3069 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 3070 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3071 __ Ldr(temp, HeapOperand(array, class_offset)); 3072 codegen_->MaybeRecordImplicitNullCheck(instruction); 3073 } 3074 GetAssembler()->MaybeUnpoisonHeapReference(temp); 3075 3076 // /* HeapReference<Class> */ temp = temp->component_type_ 3077 __ Ldr(temp, HeapOperand(temp, component_offset)); 3078 // /* HeapReference<Class> */ temp2 = value->klass_ 3079 __ Ldr(temp2, HeapOperand(Register(value), class_offset)); 3080 // If heap poisoning is enabled, no need to unpoison `temp` 3081 // nor `temp2`, as we are comparing two poisoned references. 3082 __ Cmp(temp, temp2); 3083 temps.Release(temp2); 3084 3085 if (instruction->StaticTypeOfArrayIsObjectArray()) { 3086 vixl::aarch64::Label do_put; 3087 __ B(eq, &do_put); 3088 // If heap poisoning is enabled, the `temp` reference has 3089 // not been unpoisoned yet; unpoison it now. 3090 GetAssembler()->MaybeUnpoisonHeapReference(temp); 3091 3092 // /* HeapReference<Class> */ temp = temp->super_class_ 3093 __ Ldr(temp, HeapOperand(temp, super_offset)); 3094 // If heap poisoning is enabled, no need to unpoison 3095 // `temp`, as we are comparing against null below. 3096 __ Cbnz(temp, slow_path->GetEntryLabel()); 3097 __ Bind(&do_put); 3098 } else { 3099 __ B(ne, slow_path->GetEntryLabel()); 3100 } 3101 } 3102 3103 if (kPoisonHeapReferences) { 3104 Register temp2 = temps.AcquireSameSizeAs(array); 3105 DCHECK(value.IsW()); 3106 __ Mov(temp2, value.W()); 3107 GetAssembler()->PoisonHeapReference(temp2); 3108 source = temp2; 3109 } 3110 3111 if (!index.IsConstant()) { 3112 __ Add(temp, array, offset); 3113 } else { 3114 // We no longer need the `temp` here so release it as the store below may 3115 // need a scratch register (if the constant index makes the offset too large) 3116 // and the poisoned `source` could be using the other scratch register. 3117 temps.Release(temp); 3118 } 3119 { 3120 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 3121 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3122 __ Str(source, destination); 3123 3124 if (!may_need_runtime_call_for_type_check) { 3125 codegen_->MaybeRecordImplicitNullCheck(instruction); 3126 } 3127 } 3128 } 3129 3130 codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull()); 3131 3132 if (done.IsLinked()) { 3133 __ Bind(&done); 3134 } 3135 3136 if (slow_path != nullptr) { 3137 __ Bind(slow_path->GetExitLabel()); 3138 } 3139 } 3140 } 3141 3142 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 3143 RegisterSet caller_saves = RegisterSet::Empty(); 3144 InvokeRuntimeCallingConvention calling_convention; 3145 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 3146 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode())); 3147 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 3148 locations->SetInAt(0, Location::RequiresRegister()); 3149 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 3150 } 3151 3152 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 3153 BoundsCheckSlowPathARM64* slow_path = 3154 new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction); 3155 codegen_->AddSlowPath(slow_path); 3156 __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); 3157 __ B(slow_path->GetEntryLabel(), hs); 3158 } 3159 3160 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { 3161 LocationSummary* locations = 3162 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 3163 locations->SetInAt(0, Location::RequiresRegister()); 3164 if (check->HasUses()) { 3165 locations->SetOut(Location::SameAsFirstInput()); 3166 } 3167 } 3168 3169 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { 3170 // We assume the class is not null. 3171 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( 3172 check->GetLoadClass(), check, check->GetDexPc(), true); 3173 codegen_->AddSlowPath(slow_path); 3174 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); 3175 } 3176 3177 static bool IsFloatingPointZeroConstant(HInstruction* inst) { 3178 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero())) 3179 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero())); 3180 } 3181 3182 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) { 3183 FPRegister lhs_reg = InputFPRegisterAt(instruction, 0); 3184 Location rhs_loc = instruction->GetLocations()->InAt(1); 3185 if (rhs_loc.IsConstant()) { 3186 // 0.0 is the only immediate that can be encoded directly in 3187 // an FCMP instruction. 3188 // 3189 // Both the JLS (section 15.20.1) and the JVMS (section 6.5) 3190 // specify that in a floating-point comparison, positive zero 3191 // and negative zero are considered equal, so we can use the 3192 // literal 0.0 for both cases here. 3193 // 3194 // Note however that some methods (Float.equal, Float.compare, 3195 // Float.compareTo, Double.equal, Double.compare, 3196 // Double.compareTo, Math.max, Math.min, StrictMath.max, 3197 // StrictMath.min) consider 0.0 to be (strictly) greater than 3198 // -0.0. So if we ever translate calls to these methods into a 3199 // HCompare instruction, we must handle the -0.0 case with 3200 // care here. 3201 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant())); 3202 __ Fcmp(lhs_reg, 0.0); 3203 } else { 3204 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1)); 3205 } 3206 } 3207 3208 void LocationsBuilderARM64::VisitCompare(HCompare* compare) { 3209 LocationSummary* locations = 3210 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); 3211 Primitive::Type in_type = compare->InputAt(0)->GetType(); 3212 switch (in_type) { 3213 case Primitive::kPrimBoolean: 3214 case Primitive::kPrimByte: 3215 case Primitive::kPrimShort: 3216 case Primitive::kPrimChar: 3217 case Primitive::kPrimInt: 3218 case Primitive::kPrimLong: { 3219 locations->SetInAt(0, Location::RequiresRegister()); 3220 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); 3221 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3222 break; 3223 } 3224 case Primitive::kPrimFloat: 3225 case Primitive::kPrimDouble: { 3226 locations->SetInAt(0, Location::RequiresFpuRegister()); 3227 locations->SetInAt(1, 3228 IsFloatingPointZeroConstant(compare->InputAt(1)) 3229 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) 3230 : Location::RequiresFpuRegister()); 3231 locations->SetOut(Location::RequiresRegister()); 3232 break; 3233 } 3234 default: 3235 LOG(FATAL) << "Unexpected type for compare operation " << in_type; 3236 } 3237 } 3238 3239 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { 3240 Primitive::Type in_type = compare->InputAt(0)->GetType(); 3241 3242 // 0 if: left == right 3243 // 1 if: left > right 3244 // -1 if: left < right 3245 switch (in_type) { 3246 case Primitive::kPrimBoolean: 3247 case Primitive::kPrimByte: 3248 case Primitive::kPrimShort: 3249 case Primitive::kPrimChar: 3250 case Primitive::kPrimInt: 3251 case Primitive::kPrimLong: { 3252 Register result = OutputRegister(compare); 3253 Register left = InputRegisterAt(compare, 0); 3254 Operand right = InputOperandAt(compare, 1); 3255 __ Cmp(left, right); 3256 __ Cset(result, ne); // result == +1 if NE or 0 otherwise 3257 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise 3258 break; 3259 } 3260 case Primitive::kPrimFloat: 3261 case Primitive::kPrimDouble: { 3262 Register result = OutputRegister(compare); 3263 GenerateFcmp(compare); 3264 __ Cset(result, ne); 3265 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias())); 3266 break; 3267 } 3268 default: 3269 LOG(FATAL) << "Unimplemented compare type " << in_type; 3270 } 3271 } 3272 3273 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { 3274 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 3275 3276 if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 3277 locations->SetInAt(0, Location::RequiresFpuRegister()); 3278 locations->SetInAt(1, 3279 IsFloatingPointZeroConstant(instruction->InputAt(1)) 3280 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) 3281 : Location::RequiresFpuRegister()); 3282 } else { 3283 // Integer cases. 3284 locations->SetInAt(0, Location::RequiresRegister()); 3285 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 3286 } 3287 3288 if (!instruction->IsEmittedAtUseSite()) { 3289 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3290 } 3291 } 3292 3293 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { 3294 if (instruction->IsEmittedAtUseSite()) { 3295 return; 3296 } 3297 3298 LocationSummary* locations = instruction->GetLocations(); 3299 Register res = RegisterFrom(locations->Out(), instruction->GetType()); 3300 IfCondition if_cond = instruction->GetCondition(); 3301 3302 if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 3303 GenerateFcmp(instruction); 3304 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); 3305 } else { 3306 // Integer cases. 3307 Register lhs = InputRegisterAt(instruction, 0); 3308 Operand rhs = InputOperandAt(instruction, 1); 3309 __ Cmp(lhs, rhs); 3310 __ Cset(res, ARM64Condition(if_cond)); 3311 } 3312 } 3313 3314 #define FOR_EACH_CONDITION_INSTRUCTION(M) \ 3315 M(Equal) \ 3316 M(NotEqual) \ 3317 M(LessThan) \ 3318 M(LessThanOrEqual) \ 3319 M(GreaterThan) \ 3320 M(GreaterThanOrEqual) \ 3321 M(Below) \ 3322 M(BelowOrEqual) \ 3323 M(Above) \ 3324 M(AboveOrEqual) 3325 #define DEFINE_CONDITION_VISITORS(Name) \ 3326 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \ 3327 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } 3328 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) 3329 #undef DEFINE_CONDITION_VISITORS 3330 #undef FOR_EACH_CONDITION_INSTRUCTION 3331 3332 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 3333 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3334 3335 LocationSummary* locations = instruction->GetLocations(); 3336 Location second = locations->InAt(1); 3337 DCHECK(second.IsConstant()); 3338 3339 Register out = OutputRegister(instruction); 3340 Register dividend = InputRegisterAt(instruction, 0); 3341 int64_t imm = Int64FromConstant(second.GetConstant()); 3342 DCHECK(imm == 1 || imm == -1); 3343 3344 if (instruction->IsRem()) { 3345 __ Mov(out, 0); 3346 } else { 3347 if (imm == 1) { 3348 __ Mov(out, dividend); 3349 } else { 3350 __ Neg(out, dividend); 3351 } 3352 } 3353 } 3354 3355 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { 3356 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3357 3358 LocationSummary* locations = instruction->GetLocations(); 3359 Location second = locations->InAt(1); 3360 DCHECK(second.IsConstant()); 3361 3362 Register out = OutputRegister(instruction); 3363 Register dividend = InputRegisterAt(instruction, 0); 3364 int64_t imm = Int64FromConstant(second.GetConstant()); 3365 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); 3366 int ctz_imm = CTZ(abs_imm); 3367 3368 UseScratchRegisterScope temps(GetVIXLAssembler()); 3369 Register temp = temps.AcquireSameSizeAs(out); 3370 3371 if (instruction->IsDiv()) { 3372 __ Add(temp, dividend, abs_imm - 1); 3373 __ Cmp(dividend, 0); 3374 __ Csel(out, temp, dividend, lt); 3375 if (imm > 0) { 3376 __ Asr(out, out, ctz_imm); 3377 } else { 3378 __ Neg(out, Operand(out, ASR, ctz_imm)); 3379 } 3380 } else { 3381 int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64; 3382 __ Asr(temp, dividend, bits - 1); 3383 __ Lsr(temp, temp, bits - ctz_imm); 3384 __ Add(out, dividend, temp); 3385 __ And(out, out, abs_imm - 1); 3386 __ Sub(out, out, temp); 3387 } 3388 } 3389 3390 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3391 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3392 3393 LocationSummary* locations = instruction->GetLocations(); 3394 Location second = locations->InAt(1); 3395 DCHECK(second.IsConstant()); 3396 3397 Register out = OutputRegister(instruction); 3398 Register dividend = InputRegisterAt(instruction, 0); 3399 int64_t imm = Int64FromConstant(second.GetConstant()); 3400 3401 Primitive::Type type = instruction->GetResultType(); 3402 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 3403 3404 int64_t magic; 3405 int shift; 3406 CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift); 3407 3408 UseScratchRegisterScope temps(GetVIXLAssembler()); 3409 Register temp = temps.AcquireSameSizeAs(out); 3410 3411 // temp = get_high(dividend * magic) 3412 __ Mov(temp, magic); 3413 if (type == Primitive::kPrimLong) { 3414 __ Smulh(temp, dividend, temp); 3415 } else { 3416 __ Smull(temp.X(), dividend, temp); 3417 __ Lsr(temp.X(), temp.X(), 32); 3418 } 3419 3420 if (imm > 0 && magic < 0) { 3421 __ Add(temp, temp, dividend); 3422 } else if (imm < 0 && magic > 0) { 3423 __ Sub(temp, temp, dividend); 3424 } 3425 3426 if (shift != 0) { 3427 __ Asr(temp, temp, shift); 3428 } 3429 3430 if (instruction->IsDiv()) { 3431 __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); 3432 } else { 3433 __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); 3434 // TODO: Strength reduction for msub. 3435 Register temp_imm = temps.AcquireSameSizeAs(out); 3436 __ Mov(temp_imm, imm); 3437 __ Msub(out, temp, temp_imm, dividend); 3438 } 3439 } 3440 3441 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { 3442 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3443 Primitive::Type type = instruction->GetResultType(); 3444 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 3445 3446 LocationSummary* locations = instruction->GetLocations(); 3447 Register out = OutputRegister(instruction); 3448 Location second = locations->InAt(1); 3449 3450 if (second.IsConstant()) { 3451 int64_t imm = Int64FromConstant(second.GetConstant()); 3452 3453 if (imm == 0) { 3454 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 3455 } else if (imm == 1 || imm == -1) { 3456 DivRemOneOrMinusOne(instruction); 3457 } else if (IsPowerOfTwo(AbsOrMin(imm))) { 3458 DivRemByPowerOfTwo(instruction); 3459 } else { 3460 DCHECK(imm <= -2 || imm >= 2); 3461 GenerateDivRemWithAnyConstant(instruction); 3462 } 3463 } else { 3464 Register dividend = InputRegisterAt(instruction, 0); 3465 Register divisor = InputRegisterAt(instruction, 1); 3466 if (instruction->IsDiv()) { 3467 __ Sdiv(out, dividend, divisor); 3468 } else { 3469 UseScratchRegisterScope temps(GetVIXLAssembler()); 3470 Register temp = temps.AcquireSameSizeAs(out); 3471 __ Sdiv(temp, dividend, divisor); 3472 __ Msub(out, temp, divisor, dividend); 3473 } 3474 } 3475 } 3476 3477 void LocationsBuilderARM64::VisitDiv(HDiv* div) { 3478 LocationSummary* locations = 3479 new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); 3480 switch (div->GetResultType()) { 3481 case Primitive::kPrimInt: 3482 case Primitive::kPrimLong: 3483 locations->SetInAt(0, Location::RequiresRegister()); 3484 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3485 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3486 break; 3487 3488 case Primitive::kPrimFloat: 3489 case Primitive::kPrimDouble: 3490 locations->SetInAt(0, Location::RequiresFpuRegister()); 3491 locations->SetInAt(1, Location::RequiresFpuRegister()); 3492 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3493 break; 3494 3495 default: 3496 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3497 } 3498 } 3499 3500 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { 3501 Primitive::Type type = div->GetResultType(); 3502 switch (type) { 3503 case Primitive::kPrimInt: 3504 case Primitive::kPrimLong: 3505 GenerateDivRemIntegral(div); 3506 break; 3507 3508 case Primitive::kPrimFloat: 3509 case Primitive::kPrimDouble: 3510 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1)); 3511 break; 3512 3513 default: 3514 LOG(FATAL) << "Unexpected div type " << type; 3515 } 3516 } 3517 3518 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3519 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 3520 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 3521 } 3522 3523 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3524 SlowPathCodeARM64* slow_path = 3525 new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction); 3526 codegen_->AddSlowPath(slow_path); 3527 Location value = instruction->GetLocations()->InAt(0); 3528 3529 Primitive::Type type = instruction->GetType(); 3530 3531 if (!Primitive::IsIntegralType(type)) { 3532 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; 3533 return; 3534 } 3535 3536 if (value.IsConstant()) { 3537 int64_t divisor = Int64ConstantFrom(value); 3538 if (divisor == 0) { 3539 __ B(slow_path->GetEntryLabel()); 3540 } else { 3541 // A division by a non-null constant is valid. We don't need to perform 3542 // any check, so simply fall through. 3543 } 3544 } else { 3545 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); 3546 } 3547 } 3548 3549 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { 3550 LocationSummary* locations = 3551 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 3552 locations->SetOut(Location::ConstantLocation(constant)); 3553 } 3554 3555 void InstructionCodeGeneratorARM64::VisitDoubleConstant( 3556 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 3557 // Will be generated at use site. 3558 } 3559 3560 void LocationsBuilderARM64::VisitExit(HExit* exit) { 3561 exit->SetLocations(nullptr); 3562 } 3563 3564 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 3565 } 3566 3567 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { 3568 LocationSummary* locations = 3569 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 3570 locations->SetOut(Location::ConstantLocation(constant)); 3571 } 3572 3573 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 3574 // Will be generated at use site. 3575 } 3576 3577 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 3578 DCHECK(!successor->IsExitBlock()); 3579 HBasicBlock* block = got->GetBlock(); 3580 HInstruction* previous = got->GetPrevious(); 3581 HLoopInformation* info = block->GetLoopInformation(); 3582 3583 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 3584 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); 3585 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 3586 return; 3587 } 3588 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 3589 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 3590 } 3591 if (!codegen_->GoesToNextBlock(block, successor)) { 3592 __ B(codegen_->GetLabelOf(successor)); 3593 } 3594 } 3595 3596 void LocationsBuilderARM64::VisitGoto(HGoto* got) { 3597 got->SetLocations(nullptr); 3598 } 3599 3600 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { 3601 HandleGoto(got, got->GetSuccessor()); 3602 } 3603 3604 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3605 try_boundary->SetLocations(nullptr); 3606 } 3607 3608 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3609 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 3610 if (!successor->IsExitBlock()) { 3611 HandleGoto(try_boundary, successor); 3612 } 3613 } 3614 3615 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, 3616 size_t condition_input_index, 3617 vixl::aarch64::Label* true_target, 3618 vixl::aarch64::Label* false_target) { 3619 HInstruction* cond = instruction->InputAt(condition_input_index); 3620 3621 if (true_target == nullptr && false_target == nullptr) { 3622 // Nothing to do. The code always falls through. 3623 return; 3624 } else if (cond->IsIntConstant()) { 3625 // Constant condition, statically compared against "true" (integer value 1). 3626 if (cond->AsIntConstant()->IsTrue()) { 3627 if (true_target != nullptr) { 3628 __ B(true_target); 3629 } 3630 } else { 3631 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 3632 if (false_target != nullptr) { 3633 __ B(false_target); 3634 } 3635 } 3636 return; 3637 } 3638 3639 // The following code generates these patterns: 3640 // (1) true_target == nullptr && false_target != nullptr 3641 // - opposite condition true => branch to false_target 3642 // (2) true_target != nullptr && false_target == nullptr 3643 // - condition true => branch to true_target 3644 // (3) true_target != nullptr && false_target != nullptr 3645 // - condition true => branch to true_target 3646 // - branch to false_target 3647 if (IsBooleanValueOrMaterializedCondition(cond)) { 3648 // The condition instruction has been materialized, compare the output to 0. 3649 Location cond_val = instruction->GetLocations()->InAt(condition_input_index); 3650 DCHECK(cond_val.IsRegister()); 3651 if (true_target == nullptr) { 3652 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); 3653 } else { 3654 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); 3655 } 3656 } else { 3657 // The condition instruction has not been materialized, use its inputs as 3658 // the comparison and its condition as the branch condition. 3659 HCondition* condition = cond->AsCondition(); 3660 3661 Primitive::Type type = condition->InputAt(0)->GetType(); 3662 if (Primitive::IsFloatingPointType(type)) { 3663 GenerateFcmp(condition); 3664 if (true_target == nullptr) { 3665 IfCondition opposite_condition = condition->GetOppositeCondition(); 3666 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target); 3667 } else { 3668 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target); 3669 } 3670 } else { 3671 // Integer cases. 3672 Register lhs = InputRegisterAt(condition, 0); 3673 Operand rhs = InputOperandAt(condition, 1); 3674 3675 Condition arm64_cond; 3676 vixl::aarch64::Label* non_fallthrough_target; 3677 if (true_target == nullptr) { 3678 arm64_cond = ARM64Condition(condition->GetOppositeCondition()); 3679 non_fallthrough_target = false_target; 3680 } else { 3681 arm64_cond = ARM64Condition(condition->GetCondition()); 3682 non_fallthrough_target = true_target; 3683 } 3684 3685 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) && 3686 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) { 3687 switch (arm64_cond) { 3688 case eq: 3689 __ Cbz(lhs, non_fallthrough_target); 3690 break; 3691 case ne: 3692 __ Cbnz(lhs, non_fallthrough_target); 3693 break; 3694 case lt: 3695 // Test the sign bit and branch accordingly. 3696 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3697 break; 3698 case ge: 3699 // Test the sign bit and branch accordingly. 3700 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3701 break; 3702 default: 3703 // Without the `static_cast` the compiler throws an error for 3704 // `-Werror=sign-promo`. 3705 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); 3706 } 3707 } else { 3708 __ Cmp(lhs, rhs); 3709 __ B(arm64_cond, non_fallthrough_target); 3710 } 3711 } 3712 } 3713 3714 // If neither branch falls through (case 3), the conditional branch to `true_target` 3715 // was already emitted (case 2) and we need to emit a jump to `false_target`. 3716 if (true_target != nullptr && false_target != nullptr) { 3717 __ B(false_target); 3718 } 3719 } 3720 3721 void LocationsBuilderARM64::VisitIf(HIf* if_instr) { 3722 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); 3723 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 3724 locations->SetInAt(0, Location::RequiresRegister()); 3725 } 3726 } 3727 3728 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { 3729 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 3730 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 3731 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor); 3732 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) { 3733 true_target = nullptr; 3734 } 3735 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor); 3736 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { 3737 false_target = nullptr; 3738 } 3739 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); 3740 } 3741 3742 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3743 LocationSummary* locations = new (GetGraph()->GetArena()) 3744 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 3745 InvokeRuntimeCallingConvention calling_convention; 3746 RegisterSet caller_saves = RegisterSet::Empty(); 3747 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 3748 locations->SetCustomSlowPathCallerSaves(caller_saves); 3749 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 3750 locations->SetInAt(0, Location::RequiresRegister()); 3751 } 3752 } 3753 3754 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3755 SlowPathCodeARM64* slow_path = 3756 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); 3757 GenerateTestAndBranch(deoptimize, 3758 /* condition_input_index */ 0, 3759 slow_path->GetEntryLabel(), 3760 /* false_target */ nullptr); 3761 } 3762 3763 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3764 LocationSummary* locations = new (GetGraph()->GetArena()) 3765 LocationSummary(flag, LocationSummary::kNoCall); 3766 locations->SetOut(Location::RequiresRegister()); 3767 } 3768 3769 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3770 __ Ldr(OutputRegister(flag), 3771 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 3772 } 3773 3774 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { 3775 return condition->IsCondition() && 3776 Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()); 3777 } 3778 3779 static inline Condition GetConditionForSelect(HCondition* condition) { 3780 IfCondition cond = condition->AsCondition()->GetCondition(); 3781 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias()) 3782 : ARM64Condition(cond); 3783 } 3784 3785 void LocationsBuilderARM64::VisitSelect(HSelect* select) { 3786 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); 3787 if (Primitive::IsFloatingPointType(select->GetType())) { 3788 locations->SetInAt(0, Location::RequiresFpuRegister()); 3789 locations->SetInAt(1, Location::RequiresFpuRegister()); 3790 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3791 } else { 3792 HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); 3793 HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); 3794 bool is_true_value_constant = cst_true_value != nullptr; 3795 bool is_false_value_constant = cst_false_value != nullptr; 3796 // Ask VIXL whether we should synthesize constants in registers. 3797 // We give an arbitrary register to VIXL when dealing with non-constant inputs. 3798 Operand true_op = is_true_value_constant ? 3799 Operand(Int64FromConstant(cst_true_value)) : Operand(x1); 3800 Operand false_op = is_false_value_constant ? 3801 Operand(Int64FromConstant(cst_false_value)) : Operand(x2); 3802 bool true_value_in_register = false; 3803 bool false_value_in_register = false; 3804 MacroAssembler::GetCselSynthesisInformation( 3805 x0, true_op, false_op, &true_value_in_register, &false_value_in_register); 3806 true_value_in_register |= !is_true_value_constant; 3807 false_value_in_register |= !is_false_value_constant; 3808 3809 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister() 3810 : Location::ConstantLocation(cst_true_value)); 3811 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister() 3812 : Location::ConstantLocation(cst_false_value)); 3813 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3814 } 3815 3816 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 3817 locations->SetInAt(2, Location::RequiresRegister()); 3818 } 3819 } 3820 3821 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { 3822 HInstruction* cond = select->GetCondition(); 3823 Condition csel_cond; 3824 3825 if (IsBooleanValueOrMaterializedCondition(cond)) { 3826 if (cond->IsCondition() && cond->GetNext() == select) { 3827 // Use the condition flags set by the previous instruction. 3828 csel_cond = GetConditionForSelect(cond->AsCondition()); 3829 } else { 3830 __ Cmp(InputRegisterAt(select, 2), 0); 3831 csel_cond = ne; 3832 } 3833 } else if (IsConditionOnFloatingPointValues(cond)) { 3834 GenerateFcmp(cond); 3835 csel_cond = GetConditionForSelect(cond->AsCondition()); 3836 } else { 3837 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1)); 3838 csel_cond = GetConditionForSelect(cond->AsCondition()); 3839 } 3840 3841 if (Primitive::IsFloatingPointType(select->GetType())) { 3842 __ Fcsel(OutputFPRegister(select), 3843 InputFPRegisterAt(select, 1), 3844 InputFPRegisterAt(select, 0), 3845 csel_cond); 3846 } else { 3847 __ Csel(OutputRegister(select), 3848 InputOperandAt(select, 1), 3849 InputOperandAt(select, 0), 3850 csel_cond); 3851 } 3852 } 3853 3854 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 3855 new (GetGraph()->GetArena()) LocationSummary(info); 3856 } 3857 3858 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) { 3859 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 3860 } 3861 3862 void CodeGeneratorARM64::GenerateNop() { 3863 __ Nop(); 3864 } 3865 3866 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3867 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3868 } 3869 3870 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3871 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3872 } 3873 3874 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3875 HandleFieldSet(instruction); 3876 } 3877 3878 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3879 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 3880 } 3881 3882 // Temp is used for read barrier. 3883 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 3884 if (kEmitCompilerReadBarrier && 3885 (kUseBakerReadBarrier || 3886 type_check_kind == TypeCheckKind::kAbstractClassCheck || 3887 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 3888 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 3889 return 1; 3890 } 3891 return 0; 3892 } 3893 3894 // Interface case has 3 temps, one for holding the number of interfaces, one for the current 3895 // interface pointer, one for loading the current interface. 3896 // The other checks have one temp for loading the object's class. 3897 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 3898 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 3899 return 3; 3900 } 3901 return 1 + NumberOfInstanceOfTemps(type_check_kind); 3902 } 3903 3904 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { 3905 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 3906 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3907 bool baker_read_barrier_slow_path = false; 3908 switch (type_check_kind) { 3909 case TypeCheckKind::kExactCheck: 3910 case TypeCheckKind::kAbstractClassCheck: 3911 case TypeCheckKind::kClassHierarchyCheck: 3912 case TypeCheckKind::kArrayObjectCheck: 3913 call_kind = 3914 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 3915 baker_read_barrier_slow_path = kUseBakerReadBarrier; 3916 break; 3917 case TypeCheckKind::kArrayCheck: 3918 case TypeCheckKind::kUnresolvedCheck: 3919 case TypeCheckKind::kInterfaceCheck: 3920 call_kind = LocationSummary::kCallOnSlowPath; 3921 break; 3922 } 3923 3924 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 3925 if (baker_read_barrier_slow_path) { 3926 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 3927 } 3928 locations->SetInAt(0, Location::RequiresRegister()); 3929 locations->SetInAt(1, Location::RequiresRegister()); 3930 // The "out" register is used as a temporary, so it overlaps with the inputs. 3931 // Note that TypeCheckSlowPathARM64 uses this register too. 3932 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 3933 // Add temps if necessary for read barriers. 3934 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 3935 } 3936 3937 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { 3938 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3939 LocationSummary* locations = instruction->GetLocations(); 3940 Location obj_loc = locations->InAt(0); 3941 Register obj = InputRegisterAt(instruction, 0); 3942 Register cls = InputRegisterAt(instruction, 1); 3943 Location out_loc = locations->Out(); 3944 Register out = OutputRegister(instruction); 3945 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 3946 DCHECK_LE(num_temps, 1u); 3947 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); 3948 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3949 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3950 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3951 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3952 3953 vixl::aarch64::Label done, zero; 3954 SlowPathCodeARM64* slow_path = nullptr; 3955 3956 // Return 0 if `obj` is null. 3957 // Avoid null check if we know `obj` is not null. 3958 if (instruction->MustDoNullCheck()) { 3959 __ Cbz(obj, &zero); 3960 } 3961 3962 switch (type_check_kind) { 3963 case TypeCheckKind::kExactCheck: { 3964 // /* HeapReference<Class> */ out = obj->klass_ 3965 GenerateReferenceLoadTwoRegisters(instruction, 3966 out_loc, 3967 obj_loc, 3968 class_offset, 3969 maybe_temp_loc, 3970 kCompilerReadBarrierOption); 3971 __ Cmp(out, cls); 3972 __ Cset(out, eq); 3973 if (zero.IsLinked()) { 3974 __ B(&done); 3975 } 3976 break; 3977 } 3978 3979 case TypeCheckKind::kAbstractClassCheck: { 3980 // /* HeapReference<Class> */ out = obj->klass_ 3981 GenerateReferenceLoadTwoRegisters(instruction, 3982 out_loc, 3983 obj_loc, 3984 class_offset, 3985 maybe_temp_loc, 3986 kCompilerReadBarrierOption); 3987 // If the class is abstract, we eagerly fetch the super class of the 3988 // object to avoid doing a comparison we know will fail. 3989 vixl::aarch64::Label loop, success; 3990 __ Bind(&loop); 3991 // /* HeapReference<Class> */ out = out->super_class_ 3992 GenerateReferenceLoadOneRegister(instruction, 3993 out_loc, 3994 super_offset, 3995 maybe_temp_loc, 3996 kCompilerReadBarrierOption); 3997 // If `out` is null, we use it for the result, and jump to `done`. 3998 __ Cbz(out, &done); 3999 __ Cmp(out, cls); 4000 __ B(ne, &loop); 4001 __ Mov(out, 1); 4002 if (zero.IsLinked()) { 4003 __ B(&done); 4004 } 4005 break; 4006 } 4007 4008 case TypeCheckKind::kClassHierarchyCheck: { 4009 // /* HeapReference<Class> */ out = obj->klass_ 4010 GenerateReferenceLoadTwoRegisters(instruction, 4011 out_loc, 4012 obj_loc, 4013 class_offset, 4014 maybe_temp_loc, 4015 kCompilerReadBarrierOption); 4016 // Walk over the class hierarchy to find a match. 4017 vixl::aarch64::Label loop, success; 4018 __ Bind(&loop); 4019 __ Cmp(out, cls); 4020 __ B(eq, &success); 4021 // /* HeapReference<Class> */ out = out->super_class_ 4022 GenerateReferenceLoadOneRegister(instruction, 4023 out_loc, 4024 super_offset, 4025 maybe_temp_loc, 4026 kCompilerReadBarrierOption); 4027 __ Cbnz(out, &loop); 4028 // If `out` is null, we use it for the result, and jump to `done`. 4029 __ B(&done); 4030 __ Bind(&success); 4031 __ Mov(out, 1); 4032 if (zero.IsLinked()) { 4033 __ B(&done); 4034 } 4035 break; 4036 } 4037 4038 case TypeCheckKind::kArrayObjectCheck: { 4039 // /* HeapReference<Class> */ out = obj->klass_ 4040 GenerateReferenceLoadTwoRegisters(instruction, 4041 out_loc, 4042 obj_loc, 4043 class_offset, 4044 maybe_temp_loc, 4045 kCompilerReadBarrierOption); 4046 // Do an exact check. 4047 vixl::aarch64::Label exact_check; 4048 __ Cmp(out, cls); 4049 __ B(eq, &exact_check); 4050 // Otherwise, we need to check that the object's class is a non-primitive array. 4051 // /* HeapReference<Class> */ out = out->component_type_ 4052 GenerateReferenceLoadOneRegister(instruction, 4053 out_loc, 4054 component_offset, 4055 maybe_temp_loc, 4056 kCompilerReadBarrierOption); 4057 // If `out` is null, we use it for the result, and jump to `done`. 4058 __ Cbz(out, &done); 4059 __ Ldrh(out, HeapOperand(out, primitive_offset)); 4060 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 4061 __ Cbnz(out, &zero); 4062 __ Bind(&exact_check); 4063 __ Mov(out, 1); 4064 __ B(&done); 4065 break; 4066 } 4067 4068 case TypeCheckKind::kArrayCheck: { 4069 // No read barrier since the slow path will retry upon failure. 4070 // /* HeapReference<Class> */ out = obj->klass_ 4071 GenerateReferenceLoadTwoRegisters(instruction, 4072 out_loc, 4073 obj_loc, 4074 class_offset, 4075 maybe_temp_loc, 4076 kWithoutReadBarrier); 4077 __ Cmp(out, cls); 4078 DCHECK(locations->OnlyCallsOnSlowPath()); 4079 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, 4080 /* is_fatal */ false); 4081 codegen_->AddSlowPath(slow_path); 4082 __ B(ne, slow_path->GetEntryLabel()); 4083 __ Mov(out, 1); 4084 if (zero.IsLinked()) { 4085 __ B(&done); 4086 } 4087 break; 4088 } 4089 4090 case TypeCheckKind::kUnresolvedCheck: 4091 case TypeCheckKind::kInterfaceCheck: { 4092 // Note that we indeed only call on slow path, but we always go 4093 // into the slow path for the unresolved and interface check 4094 // cases. 4095 // 4096 // We cannot directly call the InstanceofNonTrivial runtime 4097 // entry point without resorting to a type checking slow path 4098 // here (i.e. by calling InvokeRuntime directly), as it would 4099 // require to assign fixed registers for the inputs of this 4100 // HInstanceOf instruction (following the runtime calling 4101 // convention), which might be cluttered by the potential first 4102 // read barrier emission at the beginning of this method. 4103 // 4104 // TODO: Introduce a new runtime entry point taking the object 4105 // to test (instead of its class) as argument, and let it deal 4106 // with the read barrier issues. This will let us refactor this 4107 // case of the `switch` code as it was previously (with a direct 4108 // call to the runtime not using a type checking slow path). 4109 // This should also be beneficial for the other cases above. 4110 DCHECK(locations->OnlyCallsOnSlowPath()); 4111 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, 4112 /* is_fatal */ false); 4113 codegen_->AddSlowPath(slow_path); 4114 __ B(slow_path->GetEntryLabel()); 4115 if (zero.IsLinked()) { 4116 __ B(&done); 4117 } 4118 break; 4119 } 4120 } 4121 4122 if (zero.IsLinked()) { 4123 __ Bind(&zero); 4124 __ Mov(out, 0); 4125 } 4126 4127 if (done.IsLinked()) { 4128 __ Bind(&done); 4129 } 4130 4131 if (slow_path != nullptr) { 4132 __ Bind(slow_path->GetExitLabel()); 4133 } 4134 } 4135 4136 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { 4137 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 4138 bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); 4139 4140 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 4141 switch (type_check_kind) { 4142 case TypeCheckKind::kExactCheck: 4143 case TypeCheckKind::kAbstractClassCheck: 4144 case TypeCheckKind::kClassHierarchyCheck: 4145 case TypeCheckKind::kArrayObjectCheck: 4146 call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? 4147 LocationSummary::kCallOnSlowPath : 4148 LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. 4149 break; 4150 case TypeCheckKind::kArrayCheck: 4151 case TypeCheckKind::kUnresolvedCheck: 4152 case TypeCheckKind::kInterfaceCheck: 4153 call_kind = LocationSummary::kCallOnSlowPath; 4154 break; 4155 } 4156 4157 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 4158 locations->SetInAt(0, Location::RequiresRegister()); 4159 locations->SetInAt(1, Location::RequiresRegister()); 4160 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. 4161 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 4162 } 4163 4164 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { 4165 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 4166 LocationSummary* locations = instruction->GetLocations(); 4167 Location obj_loc = locations->InAt(0); 4168 Register obj = InputRegisterAt(instruction, 0); 4169 Register cls = InputRegisterAt(instruction, 1); 4170 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 4171 DCHECK_GE(num_temps, 1u); 4172 DCHECK_LE(num_temps, 3u); 4173 Location temp_loc = locations->GetTemp(0); 4174 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); 4175 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); 4176 Register temp = WRegisterFrom(temp_loc); 4177 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 4178 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 4179 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 4180 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 4181 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 4182 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 4183 const uint32_t object_array_data_offset = 4184 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 4185 4186 bool is_type_check_slow_path_fatal = false; 4187 // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases 4188 // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding 4189 // read barriers is done for performance and code size reasons. 4190 if (!kEmitCompilerReadBarrier) { 4191 is_type_check_slow_path_fatal = 4192 (type_check_kind == TypeCheckKind::kExactCheck || 4193 type_check_kind == TypeCheckKind::kAbstractClassCheck || 4194 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 4195 type_check_kind == TypeCheckKind::kArrayObjectCheck) && 4196 !instruction->CanThrowIntoCatchBlock(); 4197 } 4198 SlowPathCodeARM64* type_check_slow_path = 4199 new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, 4200 is_type_check_slow_path_fatal); 4201 codegen_->AddSlowPath(type_check_slow_path); 4202 4203 vixl::aarch64::Label done; 4204 // Avoid null check if we know obj is not null. 4205 if (instruction->MustDoNullCheck()) { 4206 __ Cbz(obj, &done); 4207 } 4208 4209 switch (type_check_kind) { 4210 case TypeCheckKind::kExactCheck: 4211 case TypeCheckKind::kArrayCheck: { 4212 // /* HeapReference<Class> */ temp = obj->klass_ 4213 GenerateReferenceLoadTwoRegisters(instruction, 4214 temp_loc, 4215 obj_loc, 4216 class_offset, 4217 maybe_temp2_loc, 4218 kWithoutReadBarrier); 4219 4220 __ Cmp(temp, cls); 4221 // Jump to slow path for throwing the exception or doing a 4222 // more involved array check. 4223 __ B(ne, type_check_slow_path->GetEntryLabel()); 4224 break; 4225 } 4226 4227 case TypeCheckKind::kAbstractClassCheck: { 4228 // /* HeapReference<Class> */ temp = obj->klass_ 4229 GenerateReferenceLoadTwoRegisters(instruction, 4230 temp_loc, 4231 obj_loc, 4232 class_offset, 4233 maybe_temp2_loc, 4234 kWithoutReadBarrier); 4235 4236 // If the class is abstract, we eagerly fetch the super class of the 4237 // object to avoid doing a comparison we know will fail. 4238 vixl::aarch64::Label loop; 4239 __ Bind(&loop); 4240 // /* HeapReference<Class> */ temp = temp->super_class_ 4241 GenerateReferenceLoadOneRegister(instruction, 4242 temp_loc, 4243 super_offset, 4244 maybe_temp2_loc, 4245 kWithoutReadBarrier); 4246 4247 // If the class reference currently in `temp` is null, jump to the slow path to throw the 4248 // exception. 4249 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 4250 // Otherwise, compare classes. 4251 __ Cmp(temp, cls); 4252 __ B(ne, &loop); 4253 break; 4254 } 4255 4256 case TypeCheckKind::kClassHierarchyCheck: { 4257 // /* HeapReference<Class> */ temp = obj->klass_ 4258 GenerateReferenceLoadTwoRegisters(instruction, 4259 temp_loc, 4260 obj_loc, 4261 class_offset, 4262 maybe_temp2_loc, 4263 kWithoutReadBarrier); 4264 4265 // Walk over the class hierarchy to find a match. 4266 vixl::aarch64::Label loop; 4267 __ Bind(&loop); 4268 __ Cmp(temp, cls); 4269 __ B(eq, &done); 4270 4271 // /* HeapReference<Class> */ temp = temp->super_class_ 4272 GenerateReferenceLoadOneRegister(instruction, 4273 temp_loc, 4274 super_offset, 4275 maybe_temp2_loc, 4276 kWithoutReadBarrier); 4277 4278 // If the class reference currently in `temp` is not null, jump 4279 // back at the beginning of the loop. 4280 __ Cbnz(temp, &loop); 4281 // Otherwise, jump to the slow path to throw the exception. 4282 __ B(type_check_slow_path->GetEntryLabel()); 4283 break; 4284 } 4285 4286 case TypeCheckKind::kArrayObjectCheck: { 4287 // /* HeapReference<Class> */ temp = obj->klass_ 4288 GenerateReferenceLoadTwoRegisters(instruction, 4289 temp_loc, 4290 obj_loc, 4291 class_offset, 4292 maybe_temp2_loc, 4293 kWithoutReadBarrier); 4294 4295 // Do an exact check. 4296 __ Cmp(temp, cls); 4297 __ B(eq, &done); 4298 4299 // Otherwise, we need to check that the object's class is a non-primitive array. 4300 // /* HeapReference<Class> */ temp = temp->component_type_ 4301 GenerateReferenceLoadOneRegister(instruction, 4302 temp_loc, 4303 component_offset, 4304 maybe_temp2_loc, 4305 kWithoutReadBarrier); 4306 4307 // If the component type is null, jump to the slow path to throw the exception. 4308 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 4309 // Otherwise, the object is indeed an array. Further check that this component type is not a 4310 // primitive type. 4311 __ Ldrh(temp, HeapOperand(temp, primitive_offset)); 4312 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 4313 __ Cbnz(temp, type_check_slow_path->GetEntryLabel()); 4314 break; 4315 } 4316 4317 case TypeCheckKind::kUnresolvedCheck: 4318 // We always go into the type check slow path for the unresolved check cases. 4319 // 4320 // We cannot directly call the CheckCast runtime entry point 4321 // without resorting to a type checking slow path here (i.e. by 4322 // calling InvokeRuntime directly), as it would require to 4323 // assign fixed registers for the inputs of this HInstanceOf 4324 // instruction (following the runtime calling convention), which 4325 // might be cluttered by the potential first read barrier 4326 // emission at the beginning of this method. 4327 __ B(type_check_slow_path->GetEntryLabel()); 4328 break; 4329 case TypeCheckKind::kInterfaceCheck: { 4330 // /* HeapReference<Class> */ temp = obj->klass_ 4331 GenerateReferenceLoadTwoRegisters(instruction, 4332 temp_loc, 4333 obj_loc, 4334 class_offset, 4335 maybe_temp2_loc, 4336 kWithoutReadBarrier); 4337 4338 // /* HeapReference<Class> */ temp = temp->iftable_ 4339 GenerateReferenceLoadTwoRegisters(instruction, 4340 temp_loc, 4341 temp_loc, 4342 iftable_offset, 4343 maybe_temp2_loc, 4344 kWithoutReadBarrier); 4345 // Iftable is never null. 4346 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset)); 4347 // Loop through the iftable and check if any class matches. 4348 vixl::aarch64::Label start_loop; 4349 __ Bind(&start_loop); 4350 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel()); 4351 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset)); 4352 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc)); 4353 // Go to next interface. 4354 __ Add(temp, temp, 2 * kHeapReferenceSize); 4355 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2); 4356 // Compare the classes and continue the loop if they do not match. 4357 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc)); 4358 __ B(ne, &start_loop); 4359 break; 4360 } 4361 } 4362 __ Bind(&done); 4363 4364 __ Bind(type_check_slow_path->GetExitLabel()); 4365 } 4366 4367 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { 4368 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); 4369 locations->SetOut(Location::ConstantLocation(constant)); 4370 } 4371 4372 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 4373 // Will be generated at use site. 4374 } 4375 4376 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { 4377 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); 4378 locations->SetOut(Location::ConstantLocation(constant)); 4379 } 4380 4381 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 4382 // Will be generated at use site. 4383 } 4384 4385 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 4386 // The trampoline uses the same calling convention as dex calling conventions, 4387 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 4388 // the method_idx. 4389 HandleInvoke(invoke); 4390 } 4391 4392 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 4393 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 4394 } 4395 4396 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { 4397 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 4398 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 4399 } 4400 4401 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 4402 HandleInvoke(invoke); 4403 } 4404 4405 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 4406 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 4407 LocationSummary* locations = invoke->GetLocations(); 4408 Register temp = XRegisterFrom(locations->GetTemp(0)); 4409 Location receiver = locations->InAt(0); 4410 Offset class_offset = mirror::Object::ClassOffset(); 4411 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 4412 4413 // The register ip1 is required to be used for the hidden argument in 4414 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. 4415 MacroAssembler* masm = GetVIXLAssembler(); 4416 UseScratchRegisterScope scratch_scope(masm); 4417 scratch_scope.Exclude(ip1); 4418 __ Mov(ip1, invoke->GetDexMethodIndex()); 4419 4420 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 4421 if (receiver.IsStackSlot()) { 4422 __ Ldr(temp.W(), StackOperandFrom(receiver)); 4423 { 4424 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4425 // /* HeapReference<Class> */ temp = temp->klass_ 4426 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); 4427 codegen_->MaybeRecordImplicitNullCheck(invoke); 4428 } 4429 } else { 4430 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4431 // /* HeapReference<Class> */ temp = receiver->klass_ 4432 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); 4433 codegen_->MaybeRecordImplicitNullCheck(invoke); 4434 } 4435 4436 // Instead of simply (possibly) unpoisoning `temp` here, we should 4437 // emit a read barrier for the previous class reference load. 4438 // However this is not required in practice, as this is an 4439 // intermediate/temporary reference and because the current 4440 // concurrent copying collector keeps the from-space memory 4441 // intact/accessible until the end of the marking phase (the 4442 // concurrent copying collector may not in the future). 4443 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4444 __ Ldr(temp, 4445 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 4446 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4447 invoke->GetImtIndex(), kArm64PointerSize)); 4448 // temp = temp->GetImtEntryAt(method_offset); 4449 __ Ldr(temp, MemOperand(temp, method_offset)); 4450 // lr = temp->GetEntryPoint(); 4451 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value())); 4452 4453 { 4454 // Ensure the pc position is recorded immediately after the `blr` instruction. 4455 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4456 4457 // lr(); 4458 __ blr(lr); 4459 DCHECK(!codegen_->IsLeafMethod()); 4460 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 4461 } 4462 } 4463 4464 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4465 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_); 4466 if (intrinsic.TryDispatch(invoke)) { 4467 return; 4468 } 4469 4470 HandleInvoke(invoke); 4471 } 4472 4473 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4474 // Explicit clinit checks triggered by static invokes must have been pruned by 4475 // art::PrepareForRegisterAllocation. 4476 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4477 4478 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_); 4479 if (intrinsic.TryDispatch(invoke)) { 4480 return; 4481 } 4482 4483 HandleInvoke(invoke); 4484 } 4485 4486 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) { 4487 if (invoke->GetLocations()->Intrinsified()) { 4488 IntrinsicCodeGeneratorARM64 intrinsic(codegen); 4489 intrinsic.Dispatch(invoke); 4490 return true; 4491 } 4492 return false; 4493 } 4494 4495 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( 4496 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 4497 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { 4498 // On ARM64 we support all dispatch types. 4499 return desired_dispatch_info; 4500 } 4501 4502 void CodeGeneratorARM64::GenerateStaticOrDirectCall( 4503 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 4504 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. 4505 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 4506 switch (invoke->GetMethodLoadKind()) { 4507 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 4508 uint32_t offset = 4509 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 4510 // temp = thread->string_init_entrypoint 4511 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset)); 4512 break; 4513 } 4514 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 4515 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 4516 break; 4517 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { 4518 DCHECK(GetCompilerOptions().IsBootImage()); 4519 // Add ADRP with its PC-relative method patch. 4520 vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); 4521 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4522 // Add ADD with its PC-relative method patch. 4523 vixl::aarch64::Label* add_label = 4524 NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label); 4525 EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4526 break; 4527 } 4528 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: 4529 // Load method address from literal pool. 4530 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); 4531 break; 4532 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 4533 // Add ADRP with its PC-relative DexCache access patch. 4534 MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 4535 vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); 4536 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4537 // Add LDR with its PC-relative DexCache access patch. 4538 vixl::aarch64::Label* ldr_label = 4539 NewMethodBssEntryPatch(target_method, adrp_label); 4540 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4541 break; 4542 } 4543 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 4544 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 4545 return; // No code pointer retrieval; the runtime performs the call directly. 4546 } 4547 } 4548 4549 switch (invoke->GetCodePtrLocation()) { 4550 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 4551 { 4552 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4553 ExactAssemblyScope eas(GetVIXLAssembler(), 4554 kInstructionSize, 4555 CodeBufferCheckScope::kExactSize); 4556 __ bl(&frame_entry_label_); 4557 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4558 } 4559 break; 4560 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 4561 // LR = callee_method->entry_point_from_quick_compiled_code_; 4562 __ Ldr(lr, MemOperand( 4563 XRegisterFrom(callee_method), 4564 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value())); 4565 { 4566 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4567 ExactAssemblyScope eas(GetVIXLAssembler(), 4568 kInstructionSize, 4569 CodeBufferCheckScope::kExactSize); 4570 // lr() 4571 __ blr(lr); 4572 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4573 } 4574 break; 4575 } 4576 4577 DCHECK(!IsLeafMethod()); 4578 } 4579 4580 void CodeGeneratorARM64::GenerateVirtualCall( 4581 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 4582 // Use the calling convention instead of the location of the receiver, as 4583 // intrinsics may have put the receiver in a different register. In the intrinsics 4584 // slow path, the arguments have been moved to the right place, so here we are 4585 // guaranteed that the receiver is the first register of the calling convention. 4586 InvokeDexCallingConvention calling_convention; 4587 Register receiver = calling_convention.GetRegisterAt(0); 4588 Register temp = XRegisterFrom(temp_in); 4589 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4590 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); 4591 Offset class_offset = mirror::Object::ClassOffset(); 4592 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 4593 4594 DCHECK(receiver.IsRegister()); 4595 4596 { 4597 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 4598 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4599 // /* HeapReference<Class> */ temp = receiver->klass_ 4600 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset)); 4601 MaybeRecordImplicitNullCheck(invoke); 4602 } 4603 // Instead of simply (possibly) unpoisoning `temp` here, we should 4604 // emit a read barrier for the previous class reference load. 4605 // intermediate/temporary reference and because the current 4606 // concurrent copying collector keeps the from-space memory 4607 // intact/accessible until the end of the marking phase (the 4608 // concurrent copying collector may not in the future). 4609 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4610 // temp = temp->GetMethodAt(method_offset); 4611 __ Ldr(temp, MemOperand(temp, method_offset)); 4612 // lr = temp->GetEntryPoint(); 4613 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); 4614 { 4615 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4616 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4617 // lr(); 4618 __ blr(lr); 4619 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4620 } 4621 } 4622 4623 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4624 HandleInvoke(invoke); 4625 } 4626 4627 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4628 codegen_->GenerateInvokePolymorphicCall(invoke); 4629 } 4630 4631 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( 4632 MethodReference target_method, 4633 vixl::aarch64::Label* adrp_label) { 4634 return NewPcRelativePatch(*target_method.dex_file, 4635 target_method.dex_method_index, 4636 adrp_label, 4637 &pc_relative_method_patches_); 4638 } 4639 4640 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( 4641 MethodReference target_method, 4642 vixl::aarch64::Label* adrp_label) { 4643 return NewPcRelativePatch(*target_method.dex_file, 4644 target_method.dex_method_index, 4645 adrp_label, 4646 &method_bss_entry_patches_); 4647 } 4648 4649 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( 4650 const DexFile& dex_file, 4651 dex::TypeIndex type_index, 4652 vixl::aarch64::Label* adrp_label) { 4653 return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_); 4654 } 4655 4656 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( 4657 const DexFile& dex_file, 4658 dex::TypeIndex type_index, 4659 vixl::aarch64::Label* adrp_label) { 4660 return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); 4661 } 4662 4663 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( 4664 const DexFile& dex_file, 4665 dex::StringIndex string_index, 4666 vixl::aarch64::Label* adrp_label) { 4667 return 4668 NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); 4669 } 4670 4671 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { 4672 baker_read_barrier_patches_.emplace_back(custom_data); 4673 return &baker_read_barrier_patches_.back().label; 4674 } 4675 4676 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( 4677 const DexFile& dex_file, 4678 uint32_t offset_or_index, 4679 vixl::aarch64::Label* adrp_label, 4680 ArenaDeque<PcRelativePatchInfo>* patches) { 4681 // Add a patch entry and return the label. 4682 patches->emplace_back(dex_file, offset_or_index); 4683 PcRelativePatchInfo* info = &patches->back(); 4684 vixl::aarch64::Label* label = &info->label; 4685 // If adrp_label is null, this is the ADRP patch and needs to point to its own label. 4686 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label; 4687 return label; 4688 } 4689 4690 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( 4691 uint64_t address) { 4692 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); 4693 } 4694 4695 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( 4696 const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { 4697 jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), 4698 reinterpret_cast64<uint64_t>(handle.GetReference())); 4699 return jit_string_patches_.GetOrCreate( 4700 StringReference(&dex_file, string_index), 4701 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); 4702 } 4703 4704 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( 4705 const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { 4706 jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), 4707 reinterpret_cast64<uint64_t>(handle.GetReference())); 4708 return jit_class_patches_.GetOrCreate( 4709 TypeReference(&dex_file, type_index), 4710 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); 4711 } 4712 4713 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, 4714 vixl::aarch64::Register reg) { 4715 DCHECK(reg.IsX()); 4716 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4717 __ Bind(fixup_label); 4718 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0)); 4719 } 4720 4721 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, 4722 vixl::aarch64::Register out, 4723 vixl::aarch64::Register base) { 4724 DCHECK(out.IsX()); 4725 DCHECK(base.IsX()); 4726 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4727 __ Bind(fixup_label); 4728 __ add(out, base, Operand(/* offset placeholder */ 0)); 4729 } 4730 4731 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, 4732 vixl::aarch64::Register out, 4733 vixl::aarch64::Register base) { 4734 DCHECK(base.IsX()); 4735 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4736 __ Bind(fixup_label); 4737 __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); 4738 } 4739 4740 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 4741 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( 4742 const ArenaDeque<PcRelativePatchInfo>& infos, 4743 ArenaVector<LinkerPatch>* linker_patches) { 4744 for (const PcRelativePatchInfo& info : infos) { 4745 linker_patches->push_back(Factory(info.label.GetLocation(), 4746 &info.target_dex_file, 4747 info.pc_insn_label->GetLocation(), 4748 info.offset_or_index)); 4749 } 4750 } 4751 4752 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { 4753 DCHECK(linker_patches->empty()); 4754 size_t size = 4755 pc_relative_method_patches_.size() + 4756 method_bss_entry_patches_.size() + 4757 pc_relative_type_patches_.size() + 4758 type_bss_entry_patches_.size() + 4759 pc_relative_string_patches_.size() + 4760 baker_read_barrier_patches_.size(); 4761 linker_patches->reserve(size); 4762 if (GetCompilerOptions().IsBootImage()) { 4763 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, 4764 linker_patches); 4765 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, 4766 linker_patches); 4767 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, 4768 linker_patches); 4769 } else { 4770 DCHECK(pc_relative_method_patches_.empty()); 4771 DCHECK(pc_relative_type_patches_.empty()); 4772 EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, 4773 linker_patches); 4774 } 4775 EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, 4776 linker_patches); 4777 EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, 4778 linker_patches); 4779 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { 4780 linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), 4781 info.custom_data)); 4782 } 4783 DCHECK_EQ(size, linker_patches->size()); 4784 } 4785 4786 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { 4787 return uint32_literals_.GetOrCreate( 4788 value, 4789 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); 4790 } 4791 4792 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) { 4793 return uint64_literals_.GetOrCreate( 4794 value, 4795 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); }); 4796 } 4797 4798 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4799 // Explicit clinit checks triggered by static invokes must have been pruned by 4800 // art::PrepareForRegisterAllocation. 4801 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4802 4803 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4804 return; 4805 } 4806 4807 // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there 4808 // are no pools emitted. 4809 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4810 LocationSummary* locations = invoke->GetLocations(); 4811 codegen_->GenerateStaticOrDirectCall( 4812 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 4813 } 4814 4815 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4816 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4817 return; 4818 } 4819 4820 // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there 4821 // are no pools emitted. 4822 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4823 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 4824 DCHECK(!codegen_->IsLeafMethod()); 4825 } 4826 4827 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( 4828 HLoadClass::LoadKind desired_class_load_kind) { 4829 switch (desired_class_load_kind) { 4830 case HLoadClass::LoadKind::kInvalid: 4831 LOG(FATAL) << "UNREACHABLE"; 4832 UNREACHABLE(); 4833 case HLoadClass::LoadKind::kReferrersClass: 4834 break; 4835 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 4836 case HLoadClass::LoadKind::kBssEntry: 4837 DCHECK(!Runtime::Current()->UseJitCompilation()); 4838 break; 4839 case HLoadClass::LoadKind::kJitTableAddress: 4840 DCHECK(Runtime::Current()->UseJitCompilation()); 4841 break; 4842 case HLoadClass::LoadKind::kBootImageAddress: 4843 case HLoadClass::LoadKind::kRuntimeCall: 4844 break; 4845 } 4846 return desired_class_load_kind; 4847 } 4848 4849 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { 4850 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4851 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4852 InvokeRuntimeCallingConvention calling_convention; 4853 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 4854 cls, 4855 LocationFrom(calling_convention.GetRegisterAt(0)), 4856 LocationFrom(vixl::aarch64::x0)); 4857 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0)); 4858 return; 4859 } 4860 DCHECK(!cls->NeedsAccessCheck()); 4861 4862 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 4863 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 4864 ? LocationSummary::kCallOnSlowPath 4865 : LocationSummary::kNoCall; 4866 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); 4867 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 4868 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4869 } 4870 4871 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 4872 locations->SetInAt(0, Location::RequiresRegister()); 4873 } 4874 locations->SetOut(Location::RequiresRegister()); 4875 if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { 4876 if (!kUseReadBarrier || kUseBakerReadBarrier) { 4877 // Rely on the type resolution or initialization and marking to save everything we need. 4878 locations->AddTemp(FixedTempLocation()); 4879 RegisterSet caller_saves = RegisterSet::Empty(); 4880 InvokeRuntimeCallingConvention calling_convention; 4881 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 4882 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), 4883 RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot), 4884 Primitive::kPrimNot).GetCode()); 4885 locations->SetCustomSlowPathCallerSaves(caller_saves); 4886 } else { 4887 // For non-Baker read barrier we have a temp-clobbering call. 4888 } 4889 } 4890 } 4891 4892 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 4893 // move. 4894 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 4895 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4896 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4897 codegen_->GenerateLoadClassRuntimeCall(cls); 4898 return; 4899 } 4900 DCHECK(!cls->NeedsAccessCheck()); 4901 4902 Location out_loc = cls->GetLocations()->Out(); 4903 Register out = OutputRegister(cls); 4904 Register bss_entry_temp; 4905 vixl::aarch64::Label* bss_entry_adrp_label = nullptr; 4906 4907 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 4908 ? kWithoutReadBarrier 4909 : kCompilerReadBarrierOption; 4910 bool generate_null_check = false; 4911 switch (load_kind) { 4912 case HLoadClass::LoadKind::kReferrersClass: { 4913 DCHECK(!cls->CanCallRuntime()); 4914 DCHECK(!cls->MustGenerateClinitCheck()); 4915 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 4916 Register current_method = InputRegisterAt(cls, 0); 4917 GenerateGcRootFieldLoad(cls, 4918 out_loc, 4919 current_method, 4920 ArtMethod::DeclaringClassOffset().Int32Value(), 4921 /* fixup_label */ nullptr, 4922 read_barrier_option); 4923 break; 4924 } 4925 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { 4926 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4927 // Add ADRP with its PC-relative type patch. 4928 const DexFile& dex_file = cls->GetDexFile(); 4929 dex::TypeIndex type_index = cls->GetTypeIndex(); 4930 vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); 4931 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4932 // Add ADD with its PC-relative type patch. 4933 vixl::aarch64::Label* add_label = 4934 codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); 4935 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 4936 break; 4937 } 4938 case HLoadClass::LoadKind::kBootImageAddress: { 4939 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4940 uint32_t address = dchecked_integral_cast<uint32_t>( 4941 reinterpret_cast<uintptr_t>(cls->GetClass().Get())); 4942 DCHECK_NE(address, 0u); 4943 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 4944 break; 4945 } 4946 case HLoadClass::LoadKind::kBssEntry: { 4947 // Add ADRP with its PC-relative Class .bss entry patch. 4948 const DexFile& dex_file = cls->GetDexFile(); 4949 dex::TypeIndex type_index = cls->GetTypeIndex(); 4950 bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0)); 4951 bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); 4952 codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp); 4953 // Add LDR with its PC-relative Class patch. 4954 vixl::aarch64::Label* ldr_label = 4955 codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label); 4956 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ 4957 GenerateGcRootFieldLoad(cls, 4958 out_loc, 4959 bss_entry_temp, 4960 /* offset placeholder */ 0u, 4961 ldr_label, 4962 read_barrier_option); 4963 generate_null_check = true; 4964 break; 4965 } 4966 case HLoadClass::LoadKind::kJitTableAddress: { 4967 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), 4968 cls->GetTypeIndex(), 4969 cls->GetClass())); 4970 GenerateGcRootFieldLoad(cls, 4971 out_loc, 4972 out.X(), 4973 /* offset */ 0, 4974 /* fixup_label */ nullptr, 4975 read_barrier_option); 4976 break; 4977 } 4978 case HLoadClass::LoadKind::kRuntimeCall: 4979 case HLoadClass::LoadKind::kInvalid: 4980 LOG(FATAL) << "UNREACHABLE"; 4981 UNREACHABLE(); 4982 } 4983 4984 bool do_clinit = cls->MustGenerateClinitCheck(); 4985 if (generate_null_check || do_clinit) { 4986 DCHECK(cls->CanCallRuntime()); 4987 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( 4988 cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label); 4989 codegen_->AddSlowPath(slow_path); 4990 if (generate_null_check) { 4991 __ Cbz(out, slow_path->GetEntryLabel()); 4992 } 4993 if (cls->MustGenerateClinitCheck()) { 4994 GenerateClassInitializationCheck(slow_path, out); 4995 } else { 4996 __ Bind(slow_path->GetExitLabel()); 4997 } 4998 } 4999 } 5000 5001 static MemOperand GetExceptionTlsAddress() { 5002 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); 5003 } 5004 5005 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { 5006 LocationSummary* locations = 5007 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); 5008 locations->SetOut(Location::RequiresRegister()); 5009 } 5010 5011 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) { 5012 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress()); 5013 } 5014 5015 void LocationsBuilderARM64::VisitClearException(HClearException* clear) { 5016 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); 5017 } 5018 5019 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 5020 __ Str(wzr, GetExceptionTlsAddress()); 5021 } 5022 5023 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( 5024 HLoadString::LoadKind desired_string_load_kind) { 5025 switch (desired_string_load_kind) { 5026 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 5027 case HLoadString::LoadKind::kBssEntry: 5028 DCHECK(!Runtime::Current()->UseJitCompilation()); 5029 break; 5030 case HLoadString::LoadKind::kJitTableAddress: 5031 DCHECK(Runtime::Current()->UseJitCompilation()); 5032 break; 5033 case HLoadString::LoadKind::kBootImageAddress: 5034 case HLoadString::LoadKind::kRuntimeCall: 5035 break; 5036 } 5037 return desired_string_load_kind; 5038 } 5039 5040 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { 5041 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 5042 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); 5043 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 5044 InvokeRuntimeCallingConvention calling_convention; 5045 locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); 5046 } else { 5047 locations->SetOut(Location::RequiresRegister()); 5048 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 5049 if (!kUseReadBarrier || kUseBakerReadBarrier) { 5050 // Rely on the pResolveString and marking to save everything we need. 5051 locations->AddTemp(FixedTempLocation()); 5052 RegisterSet caller_saves = RegisterSet::Empty(); 5053 InvokeRuntimeCallingConvention calling_convention; 5054 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 5055 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), 5056 RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot), 5057 Primitive::kPrimNot).GetCode()); 5058 locations->SetCustomSlowPathCallerSaves(caller_saves); 5059 } else { 5060 // For non-Baker read barrier we have a temp-clobbering call. 5061 } 5062 } 5063 } 5064 } 5065 5066 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 5067 // move. 5068 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 5069 Register out = OutputRegister(load); 5070 Location out_loc = load->GetLocations()->Out(); 5071 5072 switch (load->GetLoadKind()) { 5073 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 5074 // Add ADRP with its PC-relative String patch. 5075 const DexFile& dex_file = load->GetDexFile(); 5076 const dex::StringIndex string_index = load->GetStringIndex(); 5077 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 5078 vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); 5079 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 5080 // Add ADD with its PC-relative String patch. 5081 vixl::aarch64::Label* add_label = 5082 codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); 5083 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 5084 return; // No dex cache slow path. 5085 } 5086 case HLoadString::LoadKind::kBootImageAddress: { 5087 uint32_t address = dchecked_integral_cast<uint32_t>( 5088 reinterpret_cast<uintptr_t>(load->GetString().Get())); 5089 DCHECK_NE(address, 0u); 5090 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 5091 return; // No dex cache slow path. 5092 } 5093 case HLoadString::LoadKind::kBssEntry: { 5094 // Add ADRP with its PC-relative String .bss entry patch. 5095 const DexFile& dex_file = load->GetDexFile(); 5096 const dex::StringIndex string_index = load->GetStringIndex(); 5097 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 5098 Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0)); 5099 vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); 5100 codegen_->EmitAdrpPlaceholder(adrp_label, temp); 5101 // Add LDR with its PC-relative String patch. 5102 vixl::aarch64::Label* ldr_label = 5103 codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); 5104 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ 5105 GenerateGcRootFieldLoad(load, 5106 out_loc, 5107 temp, 5108 /* offset placeholder */ 0u, 5109 ldr_label, 5110 kCompilerReadBarrierOption); 5111 SlowPathCodeARM64* slow_path = 5112 new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label); 5113 codegen_->AddSlowPath(slow_path); 5114 __ Cbz(out.X(), slow_path->GetEntryLabel()); 5115 __ Bind(slow_path->GetExitLabel()); 5116 return; 5117 } 5118 case HLoadString::LoadKind::kJitTableAddress: { 5119 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), 5120 load->GetStringIndex(), 5121 load->GetString())); 5122 GenerateGcRootFieldLoad(load, 5123 out_loc, 5124 out.X(), 5125 /* offset */ 0, 5126 /* fixup_label */ nullptr, 5127 kCompilerReadBarrierOption); 5128 return; 5129 } 5130 default: 5131 break; 5132 } 5133 5134 // TODO: Re-add the compiler code to do string dex cache lookup again. 5135 InvokeRuntimeCallingConvention calling_convention; 5136 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); 5137 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); 5138 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); 5139 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 5140 } 5141 5142 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { 5143 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); 5144 locations->SetOut(Location::ConstantLocation(constant)); 5145 } 5146 5147 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 5148 // Will be generated at use site. 5149 } 5150 5151 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 5152 LocationSummary* locations = 5153 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 5154 InvokeRuntimeCallingConvention calling_convention; 5155 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5156 } 5157 5158 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 5159 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 5160 instruction, 5161 instruction->GetDexPc()); 5162 if (instruction->IsEnter()) { 5163 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 5164 } else { 5165 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 5166 } 5167 } 5168 5169 void LocationsBuilderARM64::VisitMul(HMul* mul) { 5170 LocationSummary* locations = 5171 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); 5172 switch (mul->GetResultType()) { 5173 case Primitive::kPrimInt: 5174 case Primitive::kPrimLong: 5175 locations->SetInAt(0, Location::RequiresRegister()); 5176 locations->SetInAt(1, Location::RequiresRegister()); 5177 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5178 break; 5179 5180 case Primitive::kPrimFloat: 5181 case Primitive::kPrimDouble: 5182 locations->SetInAt(0, Location::RequiresFpuRegister()); 5183 locations->SetInAt(1, Location::RequiresFpuRegister()); 5184 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5185 break; 5186 5187 default: 5188 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 5189 } 5190 } 5191 5192 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) { 5193 switch (mul->GetResultType()) { 5194 case Primitive::kPrimInt: 5195 case Primitive::kPrimLong: 5196 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); 5197 break; 5198 5199 case Primitive::kPrimFloat: 5200 case Primitive::kPrimDouble: 5201 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1)); 5202 break; 5203 5204 default: 5205 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 5206 } 5207 } 5208 5209 void LocationsBuilderARM64::VisitNeg(HNeg* neg) { 5210 LocationSummary* locations = 5211 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); 5212 switch (neg->GetResultType()) { 5213 case Primitive::kPrimInt: 5214 case Primitive::kPrimLong: 5215 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg)); 5216 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5217 break; 5218 5219 case Primitive::kPrimFloat: 5220 case Primitive::kPrimDouble: 5221 locations->SetInAt(0, Location::RequiresFpuRegister()); 5222 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5223 break; 5224 5225 default: 5226 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 5227 } 5228 } 5229 5230 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { 5231 switch (neg->GetResultType()) { 5232 case Primitive::kPrimInt: 5233 case Primitive::kPrimLong: 5234 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0)); 5235 break; 5236 5237 case Primitive::kPrimFloat: 5238 case Primitive::kPrimDouble: 5239 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0)); 5240 break; 5241 5242 default: 5243 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 5244 } 5245 } 5246 5247 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { 5248 LocationSummary* locations = 5249 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 5250 InvokeRuntimeCallingConvention calling_convention; 5251 locations->SetOut(LocationFrom(x0)); 5252 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5253 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 5254 } 5255 5256 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { 5257 // Note: if heap poisoning is enabled, the entry point takes cares 5258 // of poisoning the reference. 5259 QuickEntrypointEnum entrypoint = 5260 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); 5261 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 5262 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 5263 } 5264 5265 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { 5266 LocationSummary* locations = 5267 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 5268 InvokeRuntimeCallingConvention calling_convention; 5269 if (instruction->IsStringAlloc()) { 5270 locations->AddTemp(LocationFrom(kArtMethodRegister)); 5271 } else { 5272 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5273 } 5274 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); 5275 } 5276 5277 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { 5278 // Note: if heap poisoning is enabled, the entry point takes cares 5279 // of poisoning the reference. 5280 if (instruction->IsStringAlloc()) { 5281 // String is allocated through StringFactory. Call NewEmptyString entry point. 5282 Location temp = instruction->GetLocations()->GetTemp(0); 5283 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 5284 __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString))); 5285 __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value())); 5286 5287 { 5288 // Ensure the pc position is recorded immediately after the `blr` instruction. 5289 ExactAssemblyScope eas(GetVIXLAssembler(), 5290 kInstructionSize, 5291 CodeBufferCheckScope::kExactSize); 5292 __ blr(lr); 5293 codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); 5294 } 5295 } else { 5296 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 5297 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 5298 } 5299 } 5300 5301 void LocationsBuilderARM64::VisitNot(HNot* instruction) { 5302 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 5303 locations->SetInAt(0, Location::RequiresRegister()); 5304 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5305 } 5306 5307 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { 5308 switch (instruction->GetResultType()) { 5309 case Primitive::kPrimInt: 5310 case Primitive::kPrimLong: 5311 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0)); 5312 break; 5313 5314 default: 5315 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); 5316 } 5317 } 5318 5319 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) { 5320 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 5321 locations->SetInAt(0, Location::RequiresRegister()); 5322 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5323 } 5324 5325 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) { 5326 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1)); 5327 } 5328 5329 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { 5330 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 5331 locations->SetInAt(0, Location::RequiresRegister()); 5332 } 5333 5334 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { 5335 if (CanMoveNullCheckToUser(instruction)) { 5336 return; 5337 } 5338 { 5339 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 5340 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 5341 Location obj = instruction->GetLocations()->InAt(0); 5342 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); 5343 RecordPcInfo(instruction, instruction->GetDexPc()); 5344 } 5345 } 5346 5347 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { 5348 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction); 5349 AddSlowPath(slow_path); 5350 5351 LocationSummary* locations = instruction->GetLocations(); 5352 Location obj = locations->InAt(0); 5353 5354 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); 5355 } 5356 5357 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { 5358 codegen_->GenerateNullCheck(instruction); 5359 } 5360 5361 void LocationsBuilderARM64::VisitOr(HOr* instruction) { 5362 HandleBinaryOp(instruction); 5363 } 5364 5365 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { 5366 HandleBinaryOp(instruction); 5367 } 5368 5369 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5370 LOG(FATAL) << "Unreachable"; 5371 } 5372 5373 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) { 5374 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5375 } 5376 5377 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { 5378 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 5379 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 5380 if (location.IsStackSlot()) { 5381 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5382 } else if (location.IsDoubleStackSlot()) { 5383 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5384 } 5385 locations->SetOut(location); 5386 } 5387 5388 void InstructionCodeGeneratorARM64::VisitParameterValue( 5389 HParameterValue* instruction ATTRIBUTE_UNUSED) { 5390 // Nothing to do, the parameter is already at its location. 5391 } 5392 5393 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { 5394 LocationSummary* locations = 5395 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 5396 locations->SetOut(LocationFrom(kArtMethodRegister)); 5397 } 5398 5399 void InstructionCodeGeneratorARM64::VisitCurrentMethod( 5400 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 5401 // Nothing to do, the method is already at its location. 5402 } 5403 5404 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { 5405 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 5406 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 5407 locations->SetInAt(i, Location::Any()); 5408 } 5409 locations->SetOut(Location::Any()); 5410 } 5411 5412 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 5413 LOG(FATAL) << "Unreachable"; 5414 } 5415 5416 void LocationsBuilderARM64::VisitRem(HRem* rem) { 5417 Primitive::Type type = rem->GetResultType(); 5418 LocationSummary::CallKind call_kind = 5419 Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly 5420 : LocationSummary::kNoCall; 5421 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); 5422 5423 switch (type) { 5424 case Primitive::kPrimInt: 5425 case Primitive::kPrimLong: 5426 locations->SetInAt(0, Location::RequiresRegister()); 5427 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 5428 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5429 break; 5430 5431 case Primitive::kPrimFloat: 5432 case Primitive::kPrimDouble: { 5433 InvokeRuntimeCallingConvention calling_convention; 5434 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 5435 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 5436 locations->SetOut(calling_convention.GetReturnLocation(type)); 5437 5438 break; 5439 } 5440 5441 default: 5442 LOG(FATAL) << "Unexpected rem type " << type; 5443 } 5444 } 5445 5446 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { 5447 Primitive::Type type = rem->GetResultType(); 5448 5449 switch (type) { 5450 case Primitive::kPrimInt: 5451 case Primitive::kPrimLong: { 5452 GenerateDivRemIntegral(rem); 5453 break; 5454 } 5455 5456 case Primitive::kPrimFloat: 5457 case Primitive::kPrimDouble: { 5458 QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod; 5459 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc()); 5460 if (type == Primitive::kPrimFloat) { 5461 CheckEntrypointTypes<kQuickFmodf, float, float, float>(); 5462 } else { 5463 CheckEntrypointTypes<kQuickFmod, double, double, double>(); 5464 } 5465 break; 5466 } 5467 5468 default: 5469 LOG(FATAL) << "Unexpected rem type " << type; 5470 UNREACHABLE(); 5471 } 5472 } 5473 5474 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { 5475 constructor_fence->SetLocations(nullptr); 5476 } 5477 5478 void InstructionCodeGeneratorARM64::VisitConstructorFence( 5479 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 5480 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 5481 } 5482 5483 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5484 memory_barrier->SetLocations(nullptr); 5485 } 5486 5487 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5488 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 5489 } 5490 5491 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { 5492 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 5493 Primitive::Type return_type = instruction->InputAt(0)->GetType(); 5494 locations->SetInAt(0, ARM64ReturnLocation(return_type)); 5495 } 5496 5497 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) { 5498 codegen_->GenerateFrameExit(); 5499 } 5500 5501 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { 5502 instruction->SetLocations(nullptr); 5503 } 5504 5505 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) { 5506 codegen_->GenerateFrameExit(); 5507 } 5508 5509 void LocationsBuilderARM64::VisitRor(HRor* ror) { 5510 HandleBinaryOp(ror); 5511 } 5512 5513 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) { 5514 HandleBinaryOp(ror); 5515 } 5516 5517 void LocationsBuilderARM64::VisitShl(HShl* shl) { 5518 HandleShift(shl); 5519 } 5520 5521 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) { 5522 HandleShift(shl); 5523 } 5524 5525 void LocationsBuilderARM64::VisitShr(HShr* shr) { 5526 HandleShift(shr); 5527 } 5528 5529 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) { 5530 HandleShift(shr); 5531 } 5532 5533 void LocationsBuilderARM64::VisitSub(HSub* instruction) { 5534 HandleBinaryOp(instruction); 5535 } 5536 5537 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) { 5538 HandleBinaryOp(instruction); 5539 } 5540 5541 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5542 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5543 } 5544 5545 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5546 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5547 } 5548 5549 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5550 HandleFieldSet(instruction); 5551 } 5552 5553 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5554 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5555 } 5556 5557 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet( 5558 HUnresolvedInstanceFieldGet* instruction) { 5559 FieldAccessCallingConventionARM64 calling_convention; 5560 codegen_->CreateUnresolvedFieldLocationSummary( 5561 instruction, instruction->GetFieldType(), calling_convention); 5562 } 5563 5564 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet( 5565 HUnresolvedInstanceFieldGet* instruction) { 5566 FieldAccessCallingConventionARM64 calling_convention; 5567 codegen_->GenerateUnresolvedFieldAccess(instruction, 5568 instruction->GetFieldType(), 5569 instruction->GetFieldIndex(), 5570 instruction->GetDexPc(), 5571 calling_convention); 5572 } 5573 5574 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet( 5575 HUnresolvedInstanceFieldSet* instruction) { 5576 FieldAccessCallingConventionARM64 calling_convention; 5577 codegen_->CreateUnresolvedFieldLocationSummary( 5578 instruction, instruction->GetFieldType(), calling_convention); 5579 } 5580 5581 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet( 5582 HUnresolvedInstanceFieldSet* instruction) { 5583 FieldAccessCallingConventionARM64 calling_convention; 5584 codegen_->GenerateUnresolvedFieldAccess(instruction, 5585 instruction->GetFieldType(), 5586 instruction->GetFieldIndex(), 5587 instruction->GetDexPc(), 5588 calling_convention); 5589 } 5590 5591 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet( 5592 HUnresolvedStaticFieldGet* instruction) { 5593 FieldAccessCallingConventionARM64 calling_convention; 5594 codegen_->CreateUnresolvedFieldLocationSummary( 5595 instruction, instruction->GetFieldType(), calling_convention); 5596 } 5597 5598 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet( 5599 HUnresolvedStaticFieldGet* instruction) { 5600 FieldAccessCallingConventionARM64 calling_convention; 5601 codegen_->GenerateUnresolvedFieldAccess(instruction, 5602 instruction->GetFieldType(), 5603 instruction->GetFieldIndex(), 5604 instruction->GetDexPc(), 5605 calling_convention); 5606 } 5607 5608 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet( 5609 HUnresolvedStaticFieldSet* instruction) { 5610 FieldAccessCallingConventionARM64 calling_convention; 5611 codegen_->CreateUnresolvedFieldLocationSummary( 5612 instruction, instruction->GetFieldType(), calling_convention); 5613 } 5614 5615 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( 5616 HUnresolvedStaticFieldSet* instruction) { 5617 FieldAccessCallingConventionARM64 calling_convention; 5618 codegen_->GenerateUnresolvedFieldAccess(instruction, 5619 instruction->GetFieldType(), 5620 instruction->GetFieldIndex(), 5621 instruction->GetDexPc(), 5622 calling_convention); 5623 } 5624 5625 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5626 LocationSummary* locations = 5627 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); 5628 // In suspend check slow path, usually there are no caller-save registers at all. 5629 // If SIMD instructions are present, however, we force spilling all live SIMD 5630 // registers in full width (since the runtime only saves/restores lower part). 5631 locations->SetCustomSlowPathCallerSaves( 5632 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5633 } 5634 5635 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5636 HBasicBlock* block = instruction->GetBlock(); 5637 if (block->GetLoopInformation() != nullptr) { 5638 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5639 // The back edge will generate the suspend check. 5640 return; 5641 } 5642 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5643 // The goto will generate the suspend check. 5644 return; 5645 } 5646 GenerateSuspendCheck(instruction, nullptr); 5647 } 5648 5649 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { 5650 LocationSummary* locations = 5651 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 5652 InvokeRuntimeCallingConvention calling_convention; 5653 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5654 } 5655 5656 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { 5657 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 5658 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 5659 } 5660 5661 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { 5662 LocationSummary* locations = 5663 new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); 5664 Primitive::Type input_type = conversion->GetInputType(); 5665 Primitive::Type result_type = conversion->GetResultType(); 5666 DCHECK_NE(input_type, result_type); 5667 if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || 5668 (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { 5669 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; 5670 } 5671 5672 if (Primitive::IsFloatingPointType(input_type)) { 5673 locations->SetInAt(0, Location::RequiresFpuRegister()); 5674 } else { 5675 locations->SetInAt(0, Location::RequiresRegister()); 5676 } 5677 5678 if (Primitive::IsFloatingPointType(result_type)) { 5679 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5680 } else { 5681 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5682 } 5683 } 5684 5685 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) { 5686 Primitive::Type result_type = conversion->GetResultType(); 5687 Primitive::Type input_type = conversion->GetInputType(); 5688 5689 DCHECK_NE(input_type, result_type); 5690 5691 if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { 5692 int result_size = Primitive::ComponentSize(result_type); 5693 int input_size = Primitive::ComponentSize(input_type); 5694 int min_size = std::min(result_size, input_size); 5695 Register output = OutputRegister(conversion); 5696 Register source = InputRegisterAt(conversion, 0); 5697 if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { 5698 // 'int' values are used directly as W registers, discarding the top 5699 // bits, so we don't need to sign-extend and can just perform a move. 5700 // We do not pass the `kDiscardForSameWReg` argument to force clearing the 5701 // top 32 bits of the target register. We theoretically could leave those 5702 // bits unchanged, but we would have to make sure that no code uses a 5703 // 32bit input value as a 64bit value assuming that the top 32 bits are 5704 // zero. 5705 __ Mov(output.W(), source.W()); 5706 } else if (result_type == Primitive::kPrimChar || 5707 (input_type == Primitive::kPrimChar && input_size < result_size)) { 5708 __ Ubfx(output, 5709 output.IsX() ? source.X() : source.W(), 5710 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); 5711 } else { 5712 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); 5713 } 5714 } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { 5715 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); 5716 } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { 5717 CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); 5718 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0)); 5719 } else if (Primitive::IsFloatingPointType(result_type) && 5720 Primitive::IsFloatingPointType(input_type)) { 5721 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0)); 5722 } else { 5723 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type 5724 << " to " << result_type; 5725 } 5726 } 5727 5728 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) { 5729 HandleShift(ushr); 5730 } 5731 5732 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) { 5733 HandleShift(ushr); 5734 } 5735 5736 void LocationsBuilderARM64::VisitXor(HXor* instruction) { 5737 HandleBinaryOp(instruction); 5738 } 5739 5740 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { 5741 HandleBinaryOp(instruction); 5742 } 5743 5744 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5745 // Nothing to do, this should be removed during prepare for register allocator. 5746 LOG(FATAL) << "Unreachable"; 5747 } 5748 5749 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5750 // Nothing to do, this should be removed during prepare for register allocator. 5751 LOG(FATAL) << "Unreachable"; 5752 } 5753 5754 // Simple implementation of packed switch - generate cascaded compare/jumps. 5755 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5756 LocationSummary* locations = 5757 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); 5758 locations->SetInAt(0, Location::RequiresRegister()); 5759 } 5760 5761 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5762 int32_t lower_bound = switch_instr->GetStartValue(); 5763 uint32_t num_entries = switch_instr->GetNumEntries(); 5764 Register value_reg = InputRegisterAt(switch_instr, 0); 5765 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 5766 5767 // Roughly set 16 as max average assemblies generated per HIR in a graph. 5768 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize; 5769 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to 5770 // make sure we don't emit it if the target may run out of range. 5771 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR 5772 // ranges and emit the tables only as required. 5773 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; 5774 5775 if (num_entries <= kPackedSwitchCompareJumpThreshold || 5776 // Current instruction id is an upper bound of the number of HIRs in the graph. 5777 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { 5778 // Create a series of compare/jumps. 5779 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5780 Register temp = temps.AcquireW(); 5781 __ Subs(temp, value_reg, Operand(lower_bound)); 5782 5783 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 5784 // Jump to successors[0] if value == lower_bound. 5785 __ B(eq, codegen_->GetLabelOf(successors[0])); 5786 int32_t last_index = 0; 5787 for (; num_entries - last_index > 2; last_index += 2) { 5788 __ Subs(temp, temp, Operand(2)); 5789 // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. 5790 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); 5791 // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. 5792 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); 5793 } 5794 if (num_entries - last_index == 2) { 5795 // The last missing case_value. 5796 __ Cmp(temp, Operand(1)); 5797 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); 5798 } 5799 5800 // And the default for any other value. 5801 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 5802 __ B(codegen_->GetLabelOf(default_block)); 5803 } 5804 } else { 5805 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr); 5806 5807 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5808 5809 // Below instructions should use at most one blocked register. Since there are two blocked 5810 // registers, we are free to block one. 5811 Register temp_w = temps.AcquireW(); 5812 Register index; 5813 // Remove the bias. 5814 if (lower_bound != 0) { 5815 index = temp_w; 5816 __ Sub(index, value_reg, Operand(lower_bound)); 5817 } else { 5818 index = value_reg; 5819 } 5820 5821 // Jump to default block if index is out of the range. 5822 __ Cmp(index, Operand(num_entries)); 5823 __ B(hs, codegen_->GetLabelOf(default_block)); 5824 5825 // In current VIXL implementation, it won't require any blocked registers to encode the 5826 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the 5827 // register pressure. 5828 Register table_base = temps.AcquireX(); 5829 // Load jump offset from the table. 5830 __ Adr(table_base, jump_table->GetTableStartLabel()); 5831 Register jump_offset = temp_w; 5832 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); 5833 5834 // Jump to target block by branching to table_base(pc related) + offset. 5835 Register target_address = table_base; 5836 __ Add(target_address, table_base, Operand(jump_offset, SXTW)); 5837 __ Br(target_address); 5838 } 5839 } 5840 5841 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( 5842 HInstruction* instruction, 5843 Location out, 5844 uint32_t offset, 5845 Location maybe_temp, 5846 ReadBarrierOption read_barrier_option) { 5847 Primitive::Type type = Primitive::kPrimNot; 5848 Register out_reg = RegisterFrom(out, type); 5849 if (read_barrier_option == kWithReadBarrier) { 5850 CHECK(kEmitCompilerReadBarrier); 5851 if (kUseBakerReadBarrier) { 5852 // Load with fast path based Baker's read barrier. 5853 // /* HeapReference<Object> */ out = *(out + offset) 5854 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5855 out, 5856 out_reg, 5857 offset, 5858 maybe_temp, 5859 /* needs_null_check */ false, 5860 /* use_load_acquire */ false); 5861 } else { 5862 // Load with slow path based read barrier. 5863 // Save the value of `out` into `maybe_temp` before overwriting it 5864 // in the following move operation, as we will need it for the 5865 // read barrier below. 5866 Register temp_reg = RegisterFrom(maybe_temp, type); 5867 __ Mov(temp_reg, out_reg); 5868 // /* HeapReference<Object> */ out = *(out + offset) 5869 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5870 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 5871 } 5872 } else { 5873 // Plain load with no read barrier. 5874 // /* HeapReference<Object> */ out = *(out + offset) 5875 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5876 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5877 } 5878 } 5879 5880 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( 5881 HInstruction* instruction, 5882 Location out, 5883 Location obj, 5884 uint32_t offset, 5885 Location maybe_temp, 5886 ReadBarrierOption read_barrier_option) { 5887 Primitive::Type type = Primitive::kPrimNot; 5888 Register out_reg = RegisterFrom(out, type); 5889 Register obj_reg = RegisterFrom(obj, type); 5890 if (read_barrier_option == kWithReadBarrier) { 5891 CHECK(kEmitCompilerReadBarrier); 5892 if (kUseBakerReadBarrier) { 5893 // Load with fast path based Baker's read barrier. 5894 // /* HeapReference<Object> */ out = *(obj + offset) 5895 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5896 out, 5897 obj_reg, 5898 offset, 5899 maybe_temp, 5900 /* needs_null_check */ false, 5901 /* use_load_acquire */ false); 5902 } else { 5903 // Load with slow path based read barrier. 5904 // /* HeapReference<Object> */ out = *(obj + offset) 5905 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5906 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 5907 } 5908 } else { 5909 // Plain load with no read barrier. 5910 // /* HeapReference<Object> */ out = *(obj + offset) 5911 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5912 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5913 } 5914 } 5915 5916 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( 5917 HInstruction* instruction, 5918 Location root, 5919 Register obj, 5920 uint32_t offset, 5921 vixl::aarch64::Label* fixup_label, 5922 ReadBarrierOption read_barrier_option) { 5923 DCHECK(fixup_label == nullptr || offset == 0u); 5924 Register root_reg = RegisterFrom(root, Primitive::kPrimNot); 5925 if (read_barrier_option == kWithReadBarrier) { 5926 DCHECK(kEmitCompilerReadBarrier); 5927 if (kUseBakerReadBarrier) { 5928 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 5929 // Baker's read barrier are used. 5930 if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && 5931 !Runtime::Current()->UseJitCompilation()) { 5932 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 5933 // the Marking Register) to decide whether we need to enter 5934 // the slow path to mark the GC root. 5935 // 5936 // We use link-time generated thunks for the slow path. That thunk 5937 // checks the reference and jumps to the entrypoint if needed. 5938 // 5939 // lr = &return_address; 5940 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 5941 // if (mr) { // Thread::Current()->GetIsGcMarking() 5942 // goto gc_root_thunk<root_reg>(lr) 5943 // } 5944 // return_address: 5945 5946 UseScratchRegisterScope temps(GetVIXLAssembler()); 5947 DCHECK(temps.IsAvailable(ip0)); 5948 DCHECK(temps.IsAvailable(ip1)); 5949 temps.Exclude(ip0, ip1); 5950 uint32_t custom_data = 5951 linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); 5952 vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data); 5953 5954 EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); 5955 vixl::aarch64::Label return_address; 5956 __ adr(lr, &return_address); 5957 if (fixup_label != nullptr) { 5958 __ Bind(fixup_label); 5959 } 5960 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, 5961 "GC root LDR must be 2 instruction (8B) before the return address label."); 5962 __ ldr(root_reg, MemOperand(obj.X(), offset)); 5963 __ Bind(cbnz_label); 5964 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 5965 __ Bind(&return_address); 5966 } else { 5967 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 5968 // the Marking Register) to decide whether we need to enter 5969 // the slow path to mark the GC root. 5970 // 5971 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 5972 // if (mr) { // Thread::Current()->GetIsGcMarking() 5973 // // Slow path. 5974 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 5975 // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. 5976 // } 5977 5978 // Slow path marking the GC root `root`. The entrypoint will 5979 // be loaded by the slow path code. 5980 SlowPathCodeARM64* slow_path = 5981 new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); 5982 codegen_->AddSlowPath(slow_path); 5983 5984 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 5985 if (fixup_label == nullptr) { 5986 __ Ldr(root_reg, MemOperand(obj, offset)); 5987 } else { 5988 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); 5989 } 5990 static_assert( 5991 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 5992 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 5993 "have different sizes."); 5994 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 5995 "art::mirror::CompressedReference<mirror::Object> and int32_t " 5996 "have different sizes."); 5997 5998 __ Cbnz(mr, slow_path->GetEntryLabel()); 5999 __ Bind(slow_path->GetExitLabel()); 6000 } 6001 } else { 6002 // GC root loaded through a slow path for read barriers other 6003 // than Baker's. 6004 // /* GcRoot<mirror::Object>* */ root = obj + offset 6005 if (fixup_label == nullptr) { 6006 __ Add(root_reg.X(), obj.X(), offset); 6007 } else { 6008 codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); 6009 } 6010 // /* mirror::Object* */ root = root->Read() 6011 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 6012 } 6013 } else { 6014 // Plain GC root load with no read barrier. 6015 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 6016 if (fixup_label == nullptr) { 6017 __ Ldr(root_reg, MemOperand(obj, offset)); 6018 } else { 6019 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); 6020 } 6021 // Note that GC roots are not affected by heap poisoning, thus we 6022 // do not have to unpoison `root_reg` here. 6023 } 6024 } 6025 6026 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 6027 Location ref, 6028 Register obj, 6029 uint32_t offset, 6030 Location maybe_temp, 6031 bool needs_null_check, 6032 bool use_load_acquire) { 6033 DCHECK(kEmitCompilerReadBarrier); 6034 DCHECK(kUseBakerReadBarrier); 6035 6036 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 6037 !use_load_acquire && 6038 !Runtime::Current()->UseJitCompilation()) { 6039 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6040 // Marking Register) to decide whether we need to enter the slow 6041 // path to mark the reference. Then, in the slow path, check the 6042 // gray bit in the lock word of the reference's holder (`obj`) to 6043 // decide whether to mark `ref` or not. 6044 // 6045 // We use link-time generated thunks for the slow path. That thunk checks 6046 // the holder and jumps to the entrypoint if needed. If the holder is not 6047 // gray, it creates a fake dependency and returns to the LDR instruction. 6048 // 6049 // lr = &gray_return_address; 6050 // if (mr) { // Thread::Current()->GetIsGcMarking() 6051 // goto field_thunk<holder_reg, base_reg>(lr) 6052 // } 6053 // not_gray_return_address: 6054 // // Original reference load. If the offset is too large to fit 6055 // // into LDR, we use an adjusted base register here. 6056 // HeapReference<mirror::Object> reference = *(obj+offset); 6057 // gray_return_address: 6058 6059 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); 6060 Register base = obj; 6061 if (offset >= kReferenceLoadMinFarOffset) { 6062 DCHECK(maybe_temp.IsRegister()); 6063 base = WRegisterFrom(maybe_temp); 6064 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); 6065 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); 6066 offset &= (kReferenceLoadMinFarOffset - 1u); 6067 } 6068 UseScratchRegisterScope temps(GetVIXLAssembler()); 6069 DCHECK(temps.IsAvailable(ip0)); 6070 DCHECK(temps.IsAvailable(ip1)); 6071 temps.Exclude(ip0, ip1); 6072 uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData( 6073 base.GetCode(), 6074 obj.GetCode()); 6075 vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); 6076 6077 EmissionCheckScope guard(GetVIXLAssembler(), 6078 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 6079 vixl::aarch64::Label return_address; 6080 __ adr(lr, &return_address); 6081 __ Bind(cbnz_label); 6082 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 6083 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6084 "Field LDR must be 1 instruction (4B) before the return address label; " 6085 " 2 instructions (8B) for heap poisoning."); 6086 Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); 6087 __ ldr(ref_reg, MemOperand(base.X(), offset)); 6088 if (needs_null_check) { 6089 MaybeRecordImplicitNullCheck(instruction); 6090 } 6091 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 6092 __ Bind(&return_address); 6093 return; 6094 } 6095 6096 // /* HeapReference<Object> */ ref = *(obj + offset) 6097 Register temp = WRegisterFrom(maybe_temp); 6098 Location no_index = Location::NoLocation(); 6099 size_t no_scale_factor = 0u; 6100 GenerateReferenceLoadWithBakerReadBarrier(instruction, 6101 ref, 6102 obj, 6103 offset, 6104 no_index, 6105 no_scale_factor, 6106 temp, 6107 needs_null_check, 6108 use_load_acquire); 6109 } 6110 6111 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 6112 Location ref, 6113 Register obj, 6114 uint32_t data_offset, 6115 Location index, 6116 Register temp, 6117 bool needs_null_check) { 6118 DCHECK(kEmitCompilerReadBarrier); 6119 DCHECK(kUseBakerReadBarrier); 6120 6121 static_assert( 6122 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 6123 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 6124 size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); 6125 6126 if (kBakerReadBarrierLinkTimeThunksEnableForArrays && 6127 !Runtime::Current()->UseJitCompilation()) { 6128 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6129 // Marking Register) to decide whether we need to enter the slow 6130 // path to mark the reference. Then, in the slow path, check the 6131 // gray bit in the lock word of the reference's holder (`obj`) to 6132 // decide whether to mark `ref` or not. 6133 // 6134 // We use link-time generated thunks for the slow path. That thunk checks 6135 // the holder and jumps to the entrypoint if needed. If the holder is not 6136 // gray, it creates a fake dependency and returns to the LDR instruction. 6137 // 6138 // lr = &gray_return_address; 6139 // if (mr) { // Thread::Current()->GetIsGcMarking() 6140 // goto array_thunk<base_reg>(lr) 6141 // } 6142 // not_gray_return_address: 6143 // // Original reference load. If the offset is too large to fit 6144 // // into LDR, we use an adjusted base register here. 6145 // HeapReference<mirror::Object> reference = data[index]; 6146 // gray_return_address: 6147 6148 DCHECK(index.IsValid()); 6149 Register index_reg = RegisterFrom(index, Primitive::kPrimInt); 6150 Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); 6151 6152 UseScratchRegisterScope temps(GetVIXLAssembler()); 6153 DCHECK(temps.IsAvailable(ip0)); 6154 DCHECK(temps.IsAvailable(ip1)); 6155 temps.Exclude(ip0, ip1); 6156 uint32_t custom_data = 6157 linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); 6158 vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); 6159 6160 __ Add(temp.X(), obj.X(), Operand(data_offset)); 6161 EmissionCheckScope guard(GetVIXLAssembler(), 6162 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 6163 vixl::aarch64::Label return_address; 6164 __ adr(lr, &return_address); 6165 __ Bind(cbnz_label); 6166 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 6167 static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6168 "Array LDR must be 1 instruction (4B) before the return address label; " 6169 " 2 instructions (8B) for heap poisoning."); 6170 __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); 6171 DCHECK(!needs_null_check); // The thunk cannot handle the null check. 6172 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 6173 __ Bind(&return_address); 6174 return; 6175 } 6176 6177 // Array cells are never volatile variables, therefore array loads 6178 // never use Load-Acquire instructions on ARM64. 6179 const bool use_load_acquire = false; 6180 6181 // /* HeapReference<Object> */ ref = 6182 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 6183 GenerateReferenceLoadWithBakerReadBarrier(instruction, 6184 ref, 6185 obj, 6186 data_offset, 6187 index, 6188 scale_factor, 6189 temp, 6190 needs_null_check, 6191 use_load_acquire); 6192 } 6193 6194 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 6195 Location ref, 6196 Register obj, 6197 uint32_t offset, 6198 Location index, 6199 size_t scale_factor, 6200 Register temp, 6201 bool needs_null_check, 6202 bool use_load_acquire) { 6203 DCHECK(kEmitCompilerReadBarrier); 6204 DCHECK(kUseBakerReadBarrier); 6205 // If we are emitting an array load, we should not be using a 6206 // Load Acquire instruction. In other words: 6207 // `instruction->IsArrayGet()` => `!use_load_acquire`. 6208 DCHECK(!instruction->IsArrayGet() || !use_load_acquire); 6209 6210 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6211 // Marking Register) to decide whether we need to enter the slow 6212 // path to mark the reference. Then, in the slow path, check the 6213 // gray bit in the lock word of the reference's holder (`obj`) to 6214 // decide whether to mark `ref` or not. 6215 // 6216 // if (mr) { // Thread::Current()->GetIsGcMarking() 6217 // // Slow path. 6218 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 6219 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 6220 // HeapReference<mirror::Object> ref = *src; // Original reference load. 6221 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 6222 // if (is_gray) { 6223 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 6224 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 6225 // } 6226 // } else { 6227 // HeapReference<mirror::Object> ref = *src; // Original reference load. 6228 // } 6229 6230 // Slow path marking the object `ref` when the GC is marking. The 6231 // entrypoint will be loaded by the slow path code. 6232 SlowPathCodeARM64* slow_path = 6233 new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( 6234 instruction, 6235 ref, 6236 obj, 6237 offset, 6238 index, 6239 scale_factor, 6240 needs_null_check, 6241 use_load_acquire, 6242 temp); 6243 AddSlowPath(slow_path); 6244 6245 __ Cbnz(mr, slow_path->GetEntryLabel()); 6246 // Fast path: the GC is not marking: just load the reference. 6247 GenerateRawReferenceLoad( 6248 instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); 6249 __ Bind(slow_path->GetExitLabel()); 6250 } 6251 6252 void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, 6253 Location ref, 6254 Register obj, 6255 Location field_offset, 6256 Register temp, 6257 bool needs_null_check, 6258 bool use_load_acquire) { 6259 DCHECK(kEmitCompilerReadBarrier); 6260 DCHECK(kUseBakerReadBarrier); 6261 // If we are emitting an array load, we should not be using a 6262 // Load Acquire instruction. In other words: 6263 // `instruction->IsArrayGet()` => `!use_load_acquire`. 6264 DCHECK(!instruction->IsArrayGet() || !use_load_acquire); 6265 6266 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6267 // Marking Register) to decide whether we need to enter the slow 6268 // path to update the reference field within `obj`. Then, in the 6269 // slow path, check the gray bit in the lock word of the reference's 6270 // holder (`obj`) to decide whether to mark `ref` and update the 6271 // field or not. 6272 // 6273 // if (mr) { // Thread::Current()->GetIsGcMarking() 6274 // // Slow path. 6275 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 6276 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 6277 // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. 6278 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 6279 // if (is_gray) { 6280 // old_ref = ref; 6281 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 6282 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 6283 // compareAndSwapObject(obj, field_offset, old_ref, ref); 6284 // } 6285 // } 6286 6287 // Slow path updating the object reference at address `obj + field_offset` 6288 // when the GC is marking. The entrypoint will be loaded by the slow path code. 6289 SlowPathCodeARM64* slow_path = 6290 new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( 6291 instruction, 6292 ref, 6293 obj, 6294 /* offset */ 0u, 6295 /* index */ field_offset, 6296 /* scale_factor */ 0u /* "times 1" */, 6297 needs_null_check, 6298 use_load_acquire, 6299 temp); 6300 AddSlowPath(slow_path); 6301 6302 __ Cbnz(mr, slow_path->GetEntryLabel()); 6303 // Fast path: the GC is not marking: nothing to do (the field is 6304 // up-to-date, and we don't need to load the reference). 6305 __ Bind(slow_path->GetExitLabel()); 6306 } 6307 6308 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, 6309 Location ref, 6310 Register obj, 6311 uint32_t offset, 6312 Location index, 6313 size_t scale_factor, 6314 bool needs_null_check, 6315 bool use_load_acquire) { 6316 DCHECK(obj.IsW()); 6317 Primitive::Type type = Primitive::kPrimNot; 6318 Register ref_reg = RegisterFrom(ref, type); 6319 6320 // If needed, vixl::EmissionCheckScope guards are used to ensure 6321 // that no pools are emitted between the load (macro) instruction 6322 // and MaybeRecordImplicitNullCheck. 6323 6324 if (index.IsValid()) { 6325 // Load types involving an "index": ArrayGet, 6326 // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject 6327 // intrinsics. 6328 if (use_load_acquire) { 6329 // UnsafeGetObjectVolatile intrinsic case. 6330 // Register `index` is not an index in an object array, but an 6331 // offset to an object reference field within object `obj`. 6332 DCHECK(instruction->IsInvoke()) << instruction->DebugName(); 6333 DCHECK(instruction->GetLocations()->Intrinsified()); 6334 DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) 6335 << instruction->AsInvoke()->GetIntrinsic(); 6336 DCHECK_EQ(offset, 0u); 6337 DCHECK_EQ(scale_factor, 0u); 6338 DCHECK_EQ(needs_null_check, false); 6339 // /* HeapReference<mirror::Object> */ ref = *(obj + index) 6340 MemOperand field = HeapOperand(obj, XRegisterFrom(index)); 6341 LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); 6342 } else { 6343 // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases. 6344 // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) 6345 if (index.IsConstant()) { 6346 uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); 6347 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 6348 Load(type, ref_reg, HeapOperand(obj, computed_offset)); 6349 if (needs_null_check) { 6350 MaybeRecordImplicitNullCheck(instruction); 6351 } 6352 } else { 6353 UseScratchRegisterScope temps(GetVIXLAssembler()); 6354 Register temp = temps.AcquireW(); 6355 __ Add(temp, obj, offset); 6356 { 6357 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 6358 Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor)); 6359 if (needs_null_check) { 6360 MaybeRecordImplicitNullCheck(instruction); 6361 } 6362 } 6363 } 6364 } 6365 } else { 6366 // /* HeapReference<mirror::Object> */ ref = *(obj + offset) 6367 MemOperand field = HeapOperand(obj, offset); 6368 if (use_load_acquire) { 6369 // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire. 6370 LoadAcquire(instruction, ref_reg, field, needs_null_check); 6371 } else { 6372 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 6373 Load(type, ref_reg, field); 6374 if (needs_null_check) { 6375 MaybeRecordImplicitNullCheck(instruction); 6376 } 6377 } 6378 } 6379 6380 // Object* ref = ref_addr->AsMirrorPtr() 6381 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 6382 } 6383 6384 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, 6385 Location out, 6386 Location ref, 6387 Location obj, 6388 uint32_t offset, 6389 Location index) { 6390 DCHECK(kEmitCompilerReadBarrier); 6391 6392 // Insert a slow path based read barrier *after* the reference load. 6393 // 6394 // If heap poisoning is enabled, the unpoisoning of the loaded 6395 // reference will be carried out by the runtime within the slow 6396 // path. 6397 // 6398 // Note that `ref` currently does not get unpoisoned (when heap 6399 // poisoning is enabled), which is alright as the `ref` argument is 6400 // not used by the artReadBarrierSlow entry point. 6401 // 6402 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 6403 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) 6404 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); 6405 AddSlowPath(slow_path); 6406 6407 __ B(slow_path->GetEntryLabel()); 6408 __ Bind(slow_path->GetExitLabel()); 6409 } 6410 6411 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 6412 Location out, 6413 Location ref, 6414 Location obj, 6415 uint32_t offset, 6416 Location index) { 6417 if (kEmitCompilerReadBarrier) { 6418 // Baker's read barriers shall be handled by the fast path 6419 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). 6420 DCHECK(!kUseBakerReadBarrier); 6421 // If heap poisoning is enabled, unpoisoning will be taken care of 6422 // by the runtime within the slow path. 6423 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 6424 } else if (kPoisonHeapReferences) { 6425 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); 6426 } 6427 } 6428 6429 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 6430 Location out, 6431 Location root) { 6432 DCHECK(kEmitCompilerReadBarrier); 6433 6434 // Insert a slow path based read barrier *after* the GC root load. 6435 // 6436 // Note that GC roots are not affected by heap poisoning, so we do 6437 // not need to do anything special for this here. 6438 SlowPathCodeARM64* slow_path = 6439 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); 6440 AddSlowPath(slow_path); 6441 6442 __ B(slow_path->GetEntryLabel()); 6443 __ Bind(slow_path->GetExitLabel()); 6444 } 6445 6446 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) { 6447 LocationSummary* locations = 6448 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 6449 locations->SetInAt(0, Location::RequiresRegister()); 6450 locations->SetOut(Location::RequiresRegister()); 6451 } 6452 6453 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) { 6454 LocationSummary* locations = instruction->GetLocations(); 6455 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 6456 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 6457 instruction->GetIndex(), kArm64PointerSize).SizeValue(); 6458 __ Ldr(XRegisterFrom(locations->Out()), 6459 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); 6460 } else { 6461 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 6462 instruction->GetIndex(), kArm64PointerSize)); 6463 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), 6464 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 6465 __ Ldr(XRegisterFrom(locations->Out()), 6466 MemOperand(XRegisterFrom(locations->Out()), method_offset)); 6467 } 6468 } 6469 6470 static void PatchJitRootUse(uint8_t* code, 6471 const uint8_t* roots_data, 6472 vixl::aarch64::Literal<uint32_t>* literal, 6473 uint64_t index_in_table) { 6474 uint32_t literal_offset = literal->GetOffset(); 6475 uintptr_t address = 6476 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 6477 uint8_t* data = code + literal_offset; 6478 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); 6479 } 6480 6481 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 6482 for (const auto& entry : jit_string_patches_) { 6483 const StringReference& string_reference = entry.first; 6484 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6485 const auto it = jit_string_roots_.find(string_reference); 6486 DCHECK(it != jit_string_roots_.end()); 6487 uint64_t index_in_table = it->second; 6488 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6489 } 6490 for (const auto& entry : jit_class_patches_) { 6491 const TypeReference& type_reference = entry.first; 6492 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6493 const auto it = jit_class_roots_.find(type_reference); 6494 DCHECK(it != jit_class_roots_.end()); 6495 uint64_t index_in_table = it->second; 6496 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6497 } 6498 } 6499 6500 #undef __ 6501 #undef QUICK_ENTRY_POINT 6502 6503 } // namespace arm64 6504 } // namespace art 6505