1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_arm64.h" 18 19 #include "arch/arm64/asm_support_arm64.h" 20 #include "arch/arm64/instruction_set_features_arm64.h" 21 #include "art_method.h" 22 #include "base/bit_utils.h" 23 #include "base/bit_utils_iterator.h" 24 #include "class_table.h" 25 #include "code_generator_utils.h" 26 #include "compiled_method.h" 27 #include "entrypoints/quick/quick_entrypoints.h" 28 #include "entrypoints/quick/quick_entrypoints_enum.h" 29 #include "gc/accounting/card_table.h" 30 #include "heap_poisoning.h" 31 #include "intrinsics.h" 32 #include "intrinsics_arm64.h" 33 #include "linker/arm64/relative_patcher_arm64.h" 34 #include "linker/linker_patch.h" 35 #include "lock_word.h" 36 #include "mirror/array-inl.h" 37 #include "mirror/class-inl.h" 38 #include "offsets.h" 39 #include "thread.h" 40 #include "utils/arm64/assembler_arm64.h" 41 #include "utils/assembler.h" 42 #include "utils/stack_checks.h" 43 44 using namespace vixl::aarch64; // NOLINT(build/namespaces) 45 using vixl::ExactAssemblyScope; 46 using vixl::CodeBufferCheckScope; 47 using vixl::EmissionCheckScope; 48 49 #ifdef __ 50 #error "ARM64 Codegen VIXL macro-assembler macro already defined." 51 #endif 52 53 namespace art { 54 55 template<class MirrorType> 56 class GcRoot; 57 58 namespace arm64 { 59 60 using helpers::ARM64EncodableConstantOrRegister; 61 using helpers::ArtVixlRegCodeCoherentForRegSet; 62 using helpers::CPURegisterFrom; 63 using helpers::DRegisterFrom; 64 using helpers::FPRegisterFrom; 65 using helpers::HeapOperand; 66 using helpers::HeapOperandFrom; 67 using helpers::InputCPURegisterAt; 68 using helpers::InputCPURegisterOrZeroRegAt; 69 using helpers::InputFPRegisterAt; 70 using helpers::InputOperandAt; 71 using helpers::InputRegisterAt; 72 using helpers::Int64ConstantFrom; 73 using helpers::IsConstantZeroBitPattern; 74 using helpers::LocationFrom; 75 using helpers::OperandFromMemOperand; 76 using helpers::OutputCPURegister; 77 using helpers::OutputFPRegister; 78 using helpers::OutputRegister; 79 using helpers::QRegisterFrom; 80 using helpers::RegisterFrom; 81 using helpers::StackOperandFrom; 82 using helpers::VIXLRegCodeFromART; 83 using helpers::WRegisterFrom; 84 using helpers::XRegisterFrom; 85 86 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump 87 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 88 // generates less code/data with a small num_entries. 89 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; 90 91 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle 92 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. 93 // For the Baker read barrier implementation using link-generated thunks we need to split 94 // the offset explicitly. 95 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; 96 97 // Flags controlling the use of link-time generated thunks for Baker read barriers. 98 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; 99 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; 100 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; 101 102 // Some instructions have special requirements for a temporary, for example 103 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require 104 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field 105 // loads with large offsets need a fixed register to limit the number of link-time 106 // thunks we generate. For these and similar cases, we want to reserve a specific 107 // register that's neither callee-save nor an argument register. We choose x15. 108 inline Location FixedTempLocation() { 109 return Location::RegisterLocation(x15.GetCode()); 110 } 111 112 inline Condition ARM64Condition(IfCondition cond) { 113 switch (cond) { 114 case kCondEQ: return eq; 115 case kCondNE: return ne; 116 case kCondLT: return lt; 117 case kCondLE: return le; 118 case kCondGT: return gt; 119 case kCondGE: return ge; 120 case kCondB: return lo; 121 case kCondBE: return ls; 122 case kCondA: return hi; 123 case kCondAE: return hs; 124 } 125 LOG(FATAL) << "Unreachable"; 126 UNREACHABLE(); 127 } 128 129 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { 130 // The ARM64 condition codes can express all the necessary branches, see the 131 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. 132 // There is no dex instruction or HIR that would need the missing conditions 133 // "equal or unordered" or "not equal". 134 switch (cond) { 135 case kCondEQ: return eq; 136 case kCondNE: return ne /* unordered */; 137 case kCondLT: return gt_bias ? cc : lt /* unordered */; 138 case kCondLE: return gt_bias ? ls : le /* unordered */; 139 case kCondGT: return gt_bias ? hi /* unordered */ : gt; 140 case kCondGE: return gt_bias ? cs /* unordered */ : ge; 141 default: 142 LOG(FATAL) << "UNREACHABLE"; 143 UNREACHABLE(); 144 } 145 } 146 147 Location ARM64ReturnLocation(DataType::Type return_type) { 148 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the 149 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, 150 // but we use the exact registers for clarity. 151 if (return_type == DataType::Type::kFloat32) { 152 return LocationFrom(s0); 153 } else if (return_type == DataType::Type::kFloat64) { 154 return LocationFrom(d0); 155 } else if (return_type == DataType::Type::kInt64) { 156 return LocationFrom(x0); 157 } else if (return_type == DataType::Type::kVoid) { 158 return Location::NoLocation(); 159 } else { 160 return LocationFrom(w0); 161 } 162 } 163 164 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) { 165 return ARM64ReturnLocation(return_type); 166 } 167 168 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 169 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT 170 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() 171 172 // Calculate memory accessing operand for save/restore live registers. 173 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, 174 LocationSummary* locations, 175 int64_t spill_offset, 176 bool is_save) { 177 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); 178 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); 179 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, 180 codegen->GetNumberOfCoreRegisters(), 181 fp_spills, 182 codegen->GetNumberOfFloatingPointRegisters())); 183 184 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills); 185 unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize; 186 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills); 187 188 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler(); 189 UseScratchRegisterScope temps(masm); 190 191 Register base = masm->StackPointer(); 192 int64_t core_spill_size = core_list.GetTotalSizeInBytes(); 193 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes(); 194 int64_t reg_size = kXRegSizeInBytes; 195 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size; 196 uint32_t ls_access_size = WhichPowerOf2(reg_size); 197 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) && 198 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) { 199 // If the offset does not fit in the instruction's immediate field, use an alternate register 200 // to compute the base address(float point registers spill base address). 201 Register new_base = temps.AcquireSameSizeAs(base); 202 __ Add(new_base, base, Operand(spill_offset + core_spill_size)); 203 base = new_base; 204 spill_offset = -core_spill_size; 205 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size; 206 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size)); 207 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size)); 208 } 209 210 if (is_save) { 211 __ StoreCPURegList(core_list, MemOperand(base, spill_offset)); 212 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 213 } else { 214 __ LoadCPURegList(core_list, MemOperand(base, spill_offset)); 215 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 216 } 217 } 218 219 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 220 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); 221 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); 222 for (uint32_t i : LowToHighBits(core_spills)) { 223 // If the register holds an object, update the stack mask. 224 if (locations->RegisterContainsObject(i)) { 225 locations->SetStackBit(stack_offset / kVRegSize); 226 } 227 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 228 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 229 saved_core_stack_offsets_[i] = stack_offset; 230 stack_offset += kXRegSizeInBytes; 231 } 232 233 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); 234 for (uint32_t i : LowToHighBits(fp_spills)) { 235 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 236 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 237 saved_fpu_stack_offsets_[i] = stack_offset; 238 stack_offset += kDRegSizeInBytes; 239 } 240 241 SaveRestoreLiveRegistersHelper(codegen, 242 locations, 243 codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */); 244 } 245 246 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 247 SaveRestoreLiveRegistersHelper(codegen, 248 locations, 249 codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */); 250 } 251 252 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { 253 public: 254 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} 255 256 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 257 LocationSummary* locations = instruction_->GetLocations(); 258 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 259 260 __ Bind(GetEntryLabel()); 261 if (instruction_->CanThrowIntoCatchBlock()) { 262 // Live registers will be restored in the catch block if caught. 263 SaveLiveRegisters(codegen, instruction_->GetLocations()); 264 } 265 // We're moving two locations to locations that could overlap, so we need a parallel 266 // move resolver. 267 InvokeRuntimeCallingConvention calling_convention; 268 codegen->EmitParallelMoves(locations->InAt(0), 269 LocationFrom(calling_convention.GetRegisterAt(0)), 270 DataType::Type::kInt32, 271 locations->InAt(1), 272 LocationFrom(calling_convention.GetRegisterAt(1)), 273 DataType::Type::kInt32); 274 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 275 ? kQuickThrowStringBounds 276 : kQuickThrowArrayBounds; 277 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 278 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 279 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 280 } 281 282 bool IsFatal() const OVERRIDE { return true; } 283 284 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; } 285 286 private: 287 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); 288 }; 289 290 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { 291 public: 292 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} 293 294 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 295 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 296 __ Bind(GetEntryLabel()); 297 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 298 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 299 } 300 301 bool IsFatal() const OVERRIDE { return true; } 302 303 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; } 304 305 private: 306 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); 307 }; 308 309 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { 310 public: 311 LoadClassSlowPathARM64(HLoadClass* cls, 312 HInstruction* at, 313 uint32_t dex_pc, 314 bool do_clinit) 315 : SlowPathCodeARM64(at), 316 cls_(cls), 317 dex_pc_(dex_pc), 318 do_clinit_(do_clinit) { 319 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 320 } 321 322 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 323 LocationSummary* locations = instruction_->GetLocations(); 324 Location out = locations->Out(); 325 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 326 327 __ Bind(GetEntryLabel()); 328 SaveLiveRegisters(codegen, locations); 329 330 InvokeRuntimeCallingConvention calling_convention; 331 dex::TypeIndex type_index = cls_->GetTypeIndex(); 332 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); 333 QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage 334 : kQuickInitializeType; 335 arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); 336 if (do_clinit_) { 337 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); 338 } else { 339 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); 340 } 341 342 // Move the class to the desired location. 343 if (out.IsValid()) { 344 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 345 DataType::Type type = instruction_->GetType(); 346 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); 347 } 348 RestoreLiveRegisters(codegen, locations); 349 __ B(GetExitLabel()); 350 } 351 352 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; } 353 354 private: 355 // The class this slow path will load. 356 HLoadClass* const cls_; 357 358 // The dex PC of `at_`. 359 const uint32_t dex_pc_; 360 361 // Whether to initialize the class. 362 const bool do_clinit_; 363 364 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); 365 }; 366 367 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { 368 public: 369 explicit LoadStringSlowPathARM64(HLoadString* instruction) 370 : SlowPathCodeARM64(instruction) {} 371 372 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 373 LocationSummary* locations = instruction_->GetLocations(); 374 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 375 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 376 377 __ Bind(GetEntryLabel()); 378 SaveLiveRegisters(codegen, locations); 379 380 InvokeRuntimeCallingConvention calling_convention; 381 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 382 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_); 383 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); 384 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 385 DataType::Type type = instruction_->GetType(); 386 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); 387 388 RestoreLiveRegisters(codegen, locations); 389 390 __ B(GetExitLabel()); 391 } 392 393 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } 394 395 private: 396 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); 397 }; 398 399 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { 400 public: 401 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} 402 403 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 404 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 405 __ Bind(GetEntryLabel()); 406 if (instruction_->CanThrowIntoCatchBlock()) { 407 // Live registers will be restored in the catch block if caught. 408 SaveLiveRegisters(codegen, instruction_->GetLocations()); 409 } 410 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer, 411 instruction_, 412 instruction_->GetDexPc(), 413 this); 414 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 415 } 416 417 bool IsFatal() const OVERRIDE { return true; } 418 419 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; } 420 421 private: 422 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); 423 }; 424 425 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { 426 public: 427 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) 428 : SlowPathCodeARM64(instruction), successor_(successor) {} 429 430 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 431 LocationSummary* locations = instruction_->GetLocations(); 432 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 433 __ Bind(GetEntryLabel()); 434 SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD. 435 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 436 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 437 RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD. 438 if (successor_ == nullptr) { 439 __ B(GetReturnLabel()); 440 } else { 441 __ B(arm64_codegen->GetLabelOf(successor_)); 442 } 443 } 444 445 vixl::aarch64::Label* GetReturnLabel() { 446 DCHECK(successor_ == nullptr); 447 return &return_label_; 448 } 449 450 HBasicBlock* GetSuccessor() const { 451 return successor_; 452 } 453 454 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; } 455 456 private: 457 // If not null, the block to branch to after the suspend check. 458 HBasicBlock* const successor_; 459 460 // If `successor_` is null, the label to branch to after the suspend check. 461 vixl::aarch64::Label return_label_; 462 463 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64); 464 }; 465 466 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { 467 public: 468 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) 469 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} 470 471 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 472 LocationSummary* locations = instruction_->GetLocations(); 473 474 DCHECK(instruction_->IsCheckCast() 475 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 476 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 477 uint32_t dex_pc = instruction_->GetDexPc(); 478 479 __ Bind(GetEntryLabel()); 480 481 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 482 SaveLiveRegisters(codegen, locations); 483 } 484 485 // We're moving two locations to locations that could overlap, so we need a parallel 486 // move resolver. 487 InvokeRuntimeCallingConvention calling_convention; 488 codegen->EmitParallelMoves(locations->InAt(0), 489 LocationFrom(calling_convention.GetRegisterAt(0)), 490 DataType::Type::kReference, 491 locations->InAt(1), 492 LocationFrom(calling_convention.GetRegisterAt(1)), 493 DataType::Type::kReference); 494 if (instruction_->IsInstanceOf()) { 495 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 496 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 497 DataType::Type ret_type = instruction_->GetType(); 498 Location ret_loc = calling_convention.GetReturnLocation(ret_type); 499 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); 500 } else { 501 DCHECK(instruction_->IsCheckCast()); 502 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 503 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 504 } 505 506 if (!is_fatal_) { 507 RestoreLiveRegisters(codegen, locations); 508 __ B(GetExitLabel()); 509 } 510 } 511 512 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; } 513 bool IsFatal() const OVERRIDE { return is_fatal_; } 514 515 private: 516 const bool is_fatal_; 517 518 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); 519 }; 520 521 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { 522 public: 523 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) 524 : SlowPathCodeARM64(instruction) {} 525 526 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 527 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 528 __ Bind(GetEntryLabel()); 529 LocationSummary* locations = instruction_->GetLocations(); 530 SaveLiveRegisters(codegen, locations); 531 InvokeRuntimeCallingConvention calling_convention; 532 __ Mov(calling_convention.GetRegisterAt(0), 533 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 534 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 535 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 536 } 537 538 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } 539 540 private: 541 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); 542 }; 543 544 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { 545 public: 546 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} 547 548 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 549 LocationSummary* locations = instruction_->GetLocations(); 550 __ Bind(GetEntryLabel()); 551 SaveLiveRegisters(codegen, locations); 552 553 InvokeRuntimeCallingConvention calling_convention; 554 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 555 parallel_move.AddMove( 556 locations->InAt(0), 557 LocationFrom(calling_convention.GetRegisterAt(0)), 558 DataType::Type::kReference, 559 nullptr); 560 parallel_move.AddMove( 561 locations->InAt(1), 562 LocationFrom(calling_convention.GetRegisterAt(1)), 563 DataType::Type::kInt32, 564 nullptr); 565 parallel_move.AddMove( 566 locations->InAt(2), 567 LocationFrom(calling_convention.GetRegisterAt(2)), 568 DataType::Type::kReference, 569 nullptr); 570 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 571 572 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 573 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 574 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 575 RestoreLiveRegisters(codegen, locations); 576 __ B(GetExitLabel()); 577 } 578 579 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; } 580 581 private: 582 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); 583 }; 584 585 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { 586 uint32_t num_entries = switch_instr_->GetNumEntries(); 587 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); 588 589 // We are about to use the assembler to place literals directly. Make sure we have enough 590 // underlying code buffer and we have generated the jump table with right size. 591 EmissionCheckScope scope(codegen->GetVIXLAssembler(), 592 num_entries * sizeof(int32_t), 593 CodeBufferCheckScope::kExactSize); 594 595 __ Bind(&table_start_); 596 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); 597 for (uint32_t i = 0; i < num_entries; i++) { 598 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]); 599 DCHECK(target_label->IsBound()); 600 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation(); 601 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); 602 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); 603 Literal<int32_t> literal(jump_offset); 604 __ place(&literal); 605 } 606 } 607 608 // Abstract base class for read barrier slow paths marking a reference 609 // `ref`. 610 // 611 // Argument `entrypoint` must be a register location holding the read 612 // barrier marking runtime entry point to be invoked or an empty 613 // location; in the latter case, the read barrier marking runtime 614 // entry point will be loaded by the slow path code itself. 615 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { 616 protected: 617 ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint) 618 : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) { 619 DCHECK(kEmitCompilerReadBarrier); 620 } 621 622 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; } 623 624 // Generate assembly code calling the read barrier marking runtime 625 // entry point (ReadBarrierMarkRegX). 626 void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { 627 // No need to save live registers; it's taken care of by the 628 // entrypoint. Also, there is no need to update the stack mask, 629 // as this runtime call will not trigger a garbage collection. 630 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 631 DCHECK_NE(ref_.reg(), LR); 632 DCHECK_NE(ref_.reg(), WSP); 633 DCHECK_NE(ref_.reg(), WZR); 634 // IP0 is used internally by the ReadBarrierMarkRegX entry point 635 // as a temporary, it cannot be the entry point's input/output. 636 DCHECK_NE(ref_.reg(), IP0); 637 DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); 638 // "Compact" slow path, saving two moves. 639 // 640 // Instead of using the standard runtime calling convention (input 641 // and output in W0): 642 // 643 // W0 <- ref 644 // W0 <- ReadBarrierMark(W0) 645 // ref <- W0 646 // 647 // we just use rX (the register containing `ref`) as input and output 648 // of a dedicated entrypoint: 649 // 650 // rX <- ReadBarrierMarkRegX(rX) 651 // 652 if (entrypoint_.IsValid()) { 653 arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); 654 __ Blr(XRegisterFrom(entrypoint_)); 655 } else { 656 // Entrypoint is not already loaded, load from the thread. 657 int32_t entry_point_offset = 658 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); 659 // This runtime call does not require a stack map. 660 arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 661 } 662 } 663 664 // The location (register) of the marked object reference. 665 const Location ref_; 666 667 // The location of the entrypoint if it is already loaded. 668 const Location entrypoint_; 669 670 private: 671 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64); 672 }; 673 674 // Slow path marking an object reference `ref` during a read 675 // barrier. The field `obj.field` in the object `obj` holding this 676 // reference does not get updated by this slow path after marking. 677 // 678 // This means that after the execution of this slow path, `ref` will 679 // always be up-to-date, but `obj.field` may not; i.e., after the 680 // flip, `ref` will be a to-space reference, but `obj.field` will 681 // probably still be a from-space reference (unless it gets updated by 682 // another thread, or if another thread installed another object 683 // reference (different from `ref`) in `obj.field`). 684 // 685 // Argument `entrypoint` must be a register location holding the read 686 // barrier marking runtime entry point to be invoked or an empty 687 // location; in the latter case, the read barrier marking runtime 688 // entry point will be loaded by the slow path code itself. 689 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { 690 public: 691 ReadBarrierMarkSlowPathARM64(HInstruction* instruction, 692 Location ref, 693 Location entrypoint = Location::NoLocation()) 694 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) { 695 DCHECK(kEmitCompilerReadBarrier); 696 } 697 698 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } 699 700 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 701 LocationSummary* locations = instruction_->GetLocations(); 702 DCHECK(locations->CanCall()); 703 DCHECK(ref_.IsRegister()) << ref_; 704 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); 705 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 706 << "Unexpected instruction in read barrier marking slow path: " 707 << instruction_->DebugName(); 708 709 __ Bind(GetEntryLabel()); 710 GenerateReadBarrierMarkRuntimeCall(codegen); 711 __ B(GetExitLabel()); 712 } 713 714 private: 715 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); 716 }; 717 718 // Slow path loading `obj`'s lock word, loading a reference from 719 // object `*(obj + offset + (index << scale_factor))` into `ref`, and 720 // marking `ref` if `obj` is gray according to the lock word (Baker 721 // read barrier). The field `obj.field` in the object `obj` holding 722 // this reference does not get updated by this slow path after marking 723 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 724 // below for that). 725 // 726 // This means that after the execution of this slow path, `ref` will 727 // always be up-to-date, but `obj.field` may not; i.e., after the 728 // flip, `ref` will be a to-space reference, but `obj.field` will 729 // probably still be a from-space reference (unless it gets updated by 730 // another thread, or if another thread installed another object 731 // reference (different from `ref`) in `obj.field`). 732 // 733 // Argument `entrypoint` must be a register location holding the read 734 // barrier marking runtime entry point to be invoked or an empty 735 // location; in the latter case, the read barrier marking runtime 736 // entry point will be loaded by the slow path code itself. 737 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { 738 public: 739 LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction, 740 Location ref, 741 Register obj, 742 uint32_t offset, 743 Location index, 744 size_t scale_factor, 745 bool needs_null_check, 746 bool use_load_acquire, 747 Register temp, 748 Location entrypoint = Location::NoLocation()) 749 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), 750 obj_(obj), 751 offset_(offset), 752 index_(index), 753 scale_factor_(scale_factor), 754 needs_null_check_(needs_null_check), 755 use_load_acquire_(use_load_acquire), 756 temp_(temp) { 757 DCHECK(kEmitCompilerReadBarrier); 758 DCHECK(kUseBakerReadBarrier); 759 } 760 761 const char* GetDescription() const OVERRIDE { 762 return "LoadReferenceWithBakerReadBarrierSlowPathARM64"; 763 } 764 765 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 766 LocationSummary* locations = instruction_->GetLocations(); 767 DCHECK(locations->CanCall()); 768 DCHECK(ref_.IsRegister()) << ref_; 769 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); 770 DCHECK(obj_.IsW()); 771 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); 772 DCHECK(instruction_->IsInstanceFieldGet() || 773 instruction_->IsStaticFieldGet() || 774 instruction_->IsArrayGet() || 775 instruction_->IsArraySet() || 776 instruction_->IsInstanceOf() || 777 instruction_->IsCheckCast() || 778 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 779 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 780 << "Unexpected instruction in read barrier marking slow path: " 781 << instruction_->DebugName(); 782 // The read barrier instrumentation of object ArrayGet 783 // instructions does not support the HIntermediateAddress 784 // instruction. 785 DCHECK(!(instruction_->IsArrayGet() && 786 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 787 788 // Temporary register `temp_`, used to store the lock word, must 789 // not be IP0 nor IP1, as we may use them to emit the reference 790 // load (in the call to GenerateRawReferenceLoad below), and we 791 // need the lock word to still be in `temp_` after the reference 792 // load. 793 DCHECK_NE(LocationFrom(temp_).reg(), IP0); 794 DCHECK_NE(LocationFrom(temp_).reg(), IP1); 795 796 __ Bind(GetEntryLabel()); 797 798 // When using MaybeGenerateReadBarrierSlow, the read barrier call is 799 // inserted after the original load. However, in fast path based 800 // Baker's read barriers, we need to perform the load of 801 // mirror::Object::monitor_ *before* the original reference load. 802 // This load-load ordering is required by the read barrier. 803 // The slow path (for Baker's algorithm) should look like: 804 // 805 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 806 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 807 // HeapReference<mirror::Object> ref = *src; // Original reference load. 808 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 809 // if (is_gray) { 810 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 811 // } 812 // 813 // Note: the original implementation in ReadBarrier::Barrier is 814 // slightly more complex as it performs additional checks that we do 815 // not do here for performance reasons. 816 817 // /* int32_t */ monitor = obj->monitor_ 818 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 819 __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); 820 if (needs_null_check_) { 821 codegen->MaybeRecordImplicitNullCheck(instruction_); 822 } 823 // /* LockWord */ lock_word = LockWord(monitor) 824 static_assert(sizeof(LockWord) == sizeof(int32_t), 825 "art::LockWord and int32_t have different sizes."); 826 827 // Introduce a dependency on the lock_word including rb_state, 828 // to prevent load-load reordering, and without using 829 // a memory barrier (which would be more expensive). 830 // `obj` is unchanged by this operation, but its value now depends 831 // on `temp`. 832 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); 833 834 // The actual reference load. 835 // A possible implicit null check has already been handled above. 836 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 837 arm64_codegen->GenerateRawReferenceLoad(instruction_, 838 ref_, 839 obj_, 840 offset_, 841 index_, 842 scale_factor_, 843 /* needs_null_check */ false, 844 use_load_acquire_); 845 846 // Mark the object `ref` when `obj` is gray. 847 // 848 // if (rb_state == ReadBarrier::GrayState()) 849 // ref = ReadBarrier::Mark(ref); 850 // 851 // Given the numeric representation, it's enough to check the low bit of the rb_state. 852 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 853 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 854 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); 855 GenerateReadBarrierMarkRuntimeCall(codegen); 856 857 __ B(GetExitLabel()); 858 } 859 860 private: 861 // The register containing the object holding the marked object reference field. 862 Register obj_; 863 // The offset, index and scale factor to access the reference in `obj_`. 864 uint32_t offset_; 865 Location index_; 866 size_t scale_factor_; 867 // Is a null check required? 868 bool needs_null_check_; 869 // Should this reference load use Load-Acquire semantics? 870 bool use_load_acquire_; 871 // A temporary register used to hold the lock word of `obj_`. 872 Register temp_; 873 874 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64); 875 }; 876 877 // Slow path loading `obj`'s lock word, loading a reference from 878 // object `*(obj + offset + (index << scale_factor))` into `ref`, and 879 // marking `ref` if `obj` is gray according to the lock word (Baker 880 // read barrier). If needed, this slow path also atomically updates 881 // the field `obj.field` in the object `obj` holding this reference 882 // after marking (contrary to 883 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never 884 // tries to update `obj.field`). 885 // 886 // This means that after the execution of this slow path, both `ref` 887 // and `obj.field` will be up-to-date; i.e., after the flip, both will 888 // hold the same to-space reference (unless another thread installed 889 // another object reference (different from `ref`) in `obj.field`). 890 // 891 // Argument `entrypoint` must be a register location holding the read 892 // barrier marking runtime entry point to be invoked or an empty 893 // location; in the latter case, the read barrier marking runtime 894 // entry point will be loaded by the slow path code itself. 895 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 896 : public ReadBarrierMarkSlowPathBaseARM64 { 897 public: 898 LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( 899 HInstruction* instruction, 900 Location ref, 901 Register obj, 902 uint32_t offset, 903 Location index, 904 size_t scale_factor, 905 bool needs_null_check, 906 bool use_load_acquire, 907 Register temp, 908 Location entrypoint = Location::NoLocation()) 909 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), 910 obj_(obj), 911 offset_(offset), 912 index_(index), 913 scale_factor_(scale_factor), 914 needs_null_check_(needs_null_check), 915 use_load_acquire_(use_load_acquire), 916 temp_(temp) { 917 DCHECK(kEmitCompilerReadBarrier); 918 DCHECK(kUseBakerReadBarrier); 919 } 920 921 const char* GetDescription() const OVERRIDE { 922 return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64"; 923 } 924 925 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 926 LocationSummary* locations = instruction_->GetLocations(); 927 Register ref_reg = WRegisterFrom(ref_); 928 DCHECK(locations->CanCall()); 929 DCHECK(ref_.IsRegister()) << ref_; 930 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); 931 DCHECK(obj_.IsW()); 932 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); 933 934 // This slow path is only used by the UnsafeCASObject intrinsic at the moment. 935 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 936 << "Unexpected instruction in read barrier marking and field updating slow path: " 937 << instruction_->DebugName(); 938 DCHECK(instruction_->GetLocations()->Intrinsified()); 939 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 940 DCHECK_EQ(offset_, 0u); 941 DCHECK_EQ(scale_factor_, 0u); 942 DCHECK_EQ(use_load_acquire_, false); 943 // The location of the offset of the marked reference field within `obj_`. 944 Location field_offset = index_; 945 DCHECK(field_offset.IsRegister()) << field_offset; 946 947 // Temporary register `temp_`, used to store the lock word, must 948 // not be IP0 nor IP1, as we may use them to emit the reference 949 // load (in the call to GenerateRawReferenceLoad below), and we 950 // need the lock word to still be in `temp_` after the reference 951 // load. 952 DCHECK_NE(LocationFrom(temp_).reg(), IP0); 953 DCHECK_NE(LocationFrom(temp_).reg(), IP1); 954 955 __ Bind(GetEntryLabel()); 956 957 // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: 958 // 959 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 960 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 961 // HeapReference<mirror::Object> ref = *src; // Original reference load. 962 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 963 // if (is_gray) { 964 // old_ref = ref; 965 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 966 // compareAndSwapObject(obj, field_offset, old_ref, ref); 967 // } 968 969 // /* int32_t */ monitor = obj->monitor_ 970 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 971 __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); 972 if (needs_null_check_) { 973 codegen->MaybeRecordImplicitNullCheck(instruction_); 974 } 975 // /* LockWord */ lock_word = LockWord(monitor) 976 static_assert(sizeof(LockWord) == sizeof(int32_t), 977 "art::LockWord and int32_t have different sizes."); 978 979 // Introduce a dependency on the lock_word including rb_state, 980 // to prevent load-load reordering, and without using 981 // a memory barrier (which would be more expensive). 982 // `obj` is unchanged by this operation, but its value now depends 983 // on `temp`. 984 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); 985 986 // The actual reference load. 987 // A possible implicit null check has already been handled above. 988 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 989 arm64_codegen->GenerateRawReferenceLoad(instruction_, 990 ref_, 991 obj_, 992 offset_, 993 index_, 994 scale_factor_, 995 /* needs_null_check */ false, 996 use_load_acquire_); 997 998 // Mark the object `ref` when `obj` is gray. 999 // 1000 // if (rb_state == ReadBarrier::GrayState()) 1001 // ref = ReadBarrier::Mark(ref); 1002 // 1003 // Given the numeric representation, it's enough to check the low bit of the rb_state. 1004 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 1005 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 1006 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); 1007 1008 // Save the old value of the reference before marking it. 1009 // Note that we cannot use IP to save the old reference, as IP is 1010 // used internally by the ReadBarrierMarkRegX entry point, and we 1011 // need the old reference after the call to that entry point. 1012 DCHECK_NE(LocationFrom(temp_).reg(), IP0); 1013 __ Mov(temp_.W(), ref_reg); 1014 1015 GenerateReadBarrierMarkRuntimeCall(codegen); 1016 1017 // If the new reference is different from the old reference, 1018 // update the field in the holder (`*(obj_ + field_offset)`). 1019 // 1020 // Note that this field could also hold a different object, if 1021 // another thread had concurrently changed it. In that case, the 1022 // LDXR/CMP/BNE sequence of instructions in the compare-and-set 1023 // (CAS) operation below would abort the CAS, leaving the field 1024 // as-is. 1025 __ Cmp(temp_.W(), ref_reg); 1026 __ B(eq, GetExitLabel()); 1027 1028 // Update the the holder's field atomically. This may fail if 1029 // mutator updates before us, but it's OK. This is achieved 1030 // using a strong compare-and-set (CAS) operation with relaxed 1031 // memory synchronization ordering, where the expected value is 1032 // the old reference and the desired value is the new reference. 1033 1034 MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); 1035 UseScratchRegisterScope temps(masm); 1036 1037 // Convenience aliases. 1038 Register base = obj_.W(); 1039 Register offset = XRegisterFrom(field_offset); 1040 Register expected = temp_.W(); 1041 Register value = ref_reg; 1042 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. 1043 Register tmp_value = temps.AcquireW(); // Value in memory. 1044 1045 __ Add(tmp_ptr, base.X(), Operand(offset)); 1046 1047 if (kPoisonHeapReferences) { 1048 arm64_codegen->GetAssembler()->PoisonHeapReference(expected); 1049 if (value.Is(expected)) { 1050 // Do not poison `value`, as it is the same register as 1051 // `expected`, which has just been poisoned. 1052 } else { 1053 arm64_codegen->GetAssembler()->PoisonHeapReference(value); 1054 } 1055 } 1056 1057 // do { 1058 // tmp_value = [tmp_ptr] - expected; 1059 // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); 1060 1061 vixl::aarch64::Label loop_head, comparison_failed, exit_loop; 1062 __ Bind(&loop_head); 1063 __ Ldxr(tmp_value, MemOperand(tmp_ptr)); 1064 __ Cmp(tmp_value, expected); 1065 __ B(&comparison_failed, ne); 1066 __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); 1067 __ Cbnz(tmp_value, &loop_head); 1068 __ B(&exit_loop); 1069 __ Bind(&comparison_failed); 1070 __ Clrex(); 1071 __ Bind(&exit_loop); 1072 1073 if (kPoisonHeapReferences) { 1074 arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); 1075 if (value.Is(expected)) { 1076 // Do not unpoison `value`, as it is the same register as 1077 // `expected`, which has just been unpoisoned. 1078 } else { 1079 arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); 1080 } 1081 } 1082 1083 __ B(GetExitLabel()); 1084 } 1085 1086 private: 1087 // The register containing the object holding the marked object reference field. 1088 const Register obj_; 1089 // The offset, index and scale factor to access the reference in `obj_`. 1090 uint32_t offset_; 1091 Location index_; 1092 size_t scale_factor_; 1093 // Is a null check required? 1094 bool needs_null_check_; 1095 // Should this reference load use Load-Acquire semantics? 1096 bool use_load_acquire_; 1097 // A temporary register used to hold the lock word of `obj_`; and 1098 // also to hold the original reference value, when the reference is 1099 // marked. 1100 const Register temp_; 1101 1102 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64); 1103 }; 1104 1105 // Slow path generating a read barrier for a heap reference. 1106 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { 1107 public: 1108 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, 1109 Location out, 1110 Location ref, 1111 Location obj, 1112 uint32_t offset, 1113 Location index) 1114 : SlowPathCodeARM64(instruction), 1115 out_(out), 1116 ref_(ref), 1117 obj_(obj), 1118 offset_(offset), 1119 index_(index) { 1120 DCHECK(kEmitCompilerReadBarrier); 1121 // If `obj` is equal to `out` or `ref`, it means the initial object 1122 // has been overwritten by (or after) the heap object reference load 1123 // to be instrumented, e.g.: 1124 // 1125 // __ Ldr(out, HeapOperand(out, class_offset); 1126 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 1127 // 1128 // In that case, we have lost the information about the original 1129 // object, and the emitted read barrier cannot work properly. 1130 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 1131 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 1132 } 1133 1134 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 1135 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 1136 LocationSummary* locations = instruction_->GetLocations(); 1137 DataType::Type type = DataType::Type::kReference; 1138 DCHECK(locations->CanCall()); 1139 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 1140 DCHECK(instruction_->IsInstanceFieldGet() || 1141 instruction_->IsStaticFieldGet() || 1142 instruction_->IsArrayGet() || 1143 instruction_->IsInstanceOf() || 1144 instruction_->IsCheckCast() || 1145 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 1146 << "Unexpected instruction in read barrier for heap reference slow path: " 1147 << instruction_->DebugName(); 1148 // The read barrier instrumentation of object ArrayGet 1149 // instructions does not support the HIntermediateAddress 1150 // instruction. 1151 DCHECK(!(instruction_->IsArrayGet() && 1152 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 1153 1154 __ Bind(GetEntryLabel()); 1155 1156 SaveLiveRegisters(codegen, locations); 1157 1158 // We may have to change the index's value, but as `index_` is a 1159 // constant member (like other "inputs" of this slow path), 1160 // introduce a copy of it, `index`. 1161 Location index = index_; 1162 if (index_.IsValid()) { 1163 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 1164 if (instruction_->IsArrayGet()) { 1165 // Compute the actual memory offset and store it in `index`. 1166 Register index_reg = RegisterFrom(index_, DataType::Type::kInt32); 1167 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); 1168 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { 1169 // We are about to change the value of `index_reg` (see the 1170 // calls to vixl::MacroAssembler::Lsl and 1171 // vixl::MacroAssembler::Mov below), but it has 1172 // not been saved by the previous call to 1173 // art::SlowPathCode::SaveLiveRegisters, as it is a 1174 // callee-save register -- 1175 // art::SlowPathCode::SaveLiveRegisters does not consider 1176 // callee-save registers, as it has been designed with the 1177 // assumption that callee-save registers are supposed to be 1178 // handled by the called function. So, as a callee-save 1179 // register, `index_reg` _would_ eventually be saved onto 1180 // the stack, but it would be too late: we would have 1181 // changed its value earlier. Therefore, we manually save 1182 // it here into another freely available register, 1183 // `free_reg`, chosen of course among the caller-save 1184 // registers (as a callee-save `free_reg` register would 1185 // exhibit the same problem). 1186 // 1187 // Note we could have requested a temporary register from 1188 // the register allocator instead; but we prefer not to, as 1189 // this is a slow path, and we know we can find a 1190 // caller-save register that is available. 1191 Register free_reg = FindAvailableCallerSaveRegister(codegen); 1192 __ Mov(free_reg.W(), index_reg); 1193 index_reg = free_reg; 1194 index = LocationFrom(index_reg); 1195 } else { 1196 // The initial register stored in `index_` has already been 1197 // saved in the call to art::SlowPathCode::SaveLiveRegisters 1198 // (as it is not a callee-save register), so we can freely 1199 // use it. 1200 } 1201 // Shifting the index value contained in `index_reg` by the scale 1202 // factor (2) cannot overflow in practice, as the runtime is 1203 // unable to allocate object arrays with a size larger than 1204 // 2^26 - 1 (that is, 2^28 - 4 bytes). 1205 __ Lsl(index_reg, index_reg, DataType::SizeShift(type)); 1206 static_assert( 1207 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 1208 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 1209 __ Add(index_reg, index_reg, Operand(offset_)); 1210 } else { 1211 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 1212 // intrinsics, `index_` is not shifted by a scale factor of 2 1213 // (as in the case of ArrayGet), as it is actually an offset 1214 // to an object field within an object. 1215 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 1216 DCHECK(instruction_->GetLocations()->Intrinsified()); 1217 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 1218 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 1219 << instruction_->AsInvoke()->GetIntrinsic(); 1220 DCHECK_EQ(offset_, 0u); 1221 DCHECK(index_.IsRegister()); 1222 } 1223 } 1224 1225 // We're moving two or three locations to locations that could 1226 // overlap, so we need a parallel move resolver. 1227 InvokeRuntimeCallingConvention calling_convention; 1228 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 1229 parallel_move.AddMove(ref_, 1230 LocationFrom(calling_convention.GetRegisterAt(0)), 1231 type, 1232 nullptr); 1233 parallel_move.AddMove(obj_, 1234 LocationFrom(calling_convention.GetRegisterAt(1)), 1235 type, 1236 nullptr); 1237 if (index.IsValid()) { 1238 parallel_move.AddMove(index, 1239 LocationFrom(calling_convention.GetRegisterAt(2)), 1240 DataType::Type::kInt32, 1241 nullptr); 1242 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 1243 } else { 1244 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 1245 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); 1246 } 1247 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 1248 instruction_, 1249 instruction_->GetDexPc(), 1250 this); 1251 CheckEntrypointTypes< 1252 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 1253 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 1254 1255 RestoreLiveRegisters(codegen, locations); 1256 1257 __ B(GetExitLabel()); 1258 } 1259 1260 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } 1261 1262 private: 1263 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 1264 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode()); 1265 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode()); 1266 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 1267 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 1268 return Register(VIXLRegCodeFromART(i), kXRegSize); 1269 } 1270 } 1271 // We shall never fail to find a free caller-save register, as 1272 // there are more than two core caller-save registers on ARM64 1273 // (meaning it is possible to find one which is different from 1274 // `ref` and `obj`). 1275 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 1276 LOG(FATAL) << "Could not find a free register"; 1277 UNREACHABLE(); 1278 } 1279 1280 const Location out_; 1281 const Location ref_; 1282 const Location obj_; 1283 const uint32_t offset_; 1284 // An additional location containing an index to an array. 1285 // Only used for HArrayGet and the UnsafeGetObject & 1286 // UnsafeGetObjectVolatile intrinsics. 1287 const Location index_; 1288 1289 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); 1290 }; 1291 1292 // Slow path generating a read barrier for a GC root. 1293 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { 1294 public: 1295 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) 1296 : SlowPathCodeARM64(instruction), out_(out), root_(root) { 1297 DCHECK(kEmitCompilerReadBarrier); 1298 } 1299 1300 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 1301 LocationSummary* locations = instruction_->GetLocations(); 1302 DataType::Type type = DataType::Type::kReference; 1303 DCHECK(locations->CanCall()); 1304 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 1305 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 1306 << "Unexpected instruction in read barrier for GC root slow path: " 1307 << instruction_->DebugName(); 1308 1309 __ Bind(GetEntryLabel()); 1310 SaveLiveRegisters(codegen, locations); 1311 1312 InvokeRuntimeCallingConvention calling_convention; 1313 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 1314 // The argument of the ReadBarrierForRootSlow is not a managed 1315 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; 1316 // thus we need a 64-bit move here, and we cannot use 1317 // 1318 // arm64_codegen->MoveLocation( 1319 // LocationFrom(calling_convention.GetRegisterAt(0)), 1320 // root_, 1321 // type); 1322 // 1323 // which would emit a 32-bit move, as `type` is a (32-bit wide) 1324 // reference type (`DataType::Type::kReference`). 1325 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); 1326 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 1327 instruction_, 1328 instruction_->GetDexPc(), 1329 this); 1330 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 1331 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 1332 1333 RestoreLiveRegisters(codegen, locations); 1334 __ B(GetExitLabel()); 1335 } 1336 1337 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } 1338 1339 private: 1340 const Location out_; 1341 const Location root_; 1342 1343 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); 1344 }; 1345 1346 #undef __ 1347 1348 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) { 1349 Location next_location; 1350 if (type == DataType::Type::kVoid) { 1351 LOG(FATAL) << "Unreachable type " << type; 1352 } 1353 1354 if (DataType::IsFloatingPointType(type) && 1355 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { 1356 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); 1357 } else if (!DataType::IsFloatingPointType(type) && 1358 (gp_index_ < calling_convention.GetNumberOfRegisters())) { 1359 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); 1360 } else { 1361 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); 1362 next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) 1363 : Location::StackSlot(stack_offset); 1364 } 1365 1366 // Space on the stack is reserved for all arguments. 1367 stack_index_ += DataType::Is64BitType(type) ? 2 : 1; 1368 return next_location; 1369 } 1370 1371 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { 1372 return LocationFrom(kArtMethodRegister); 1373 } 1374 1375 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, 1376 const Arm64InstructionSetFeatures& isa_features, 1377 const CompilerOptions& compiler_options, 1378 OptimizingCompilerStats* stats) 1379 : CodeGenerator(graph, 1380 kNumberOfAllocatableRegisters, 1381 kNumberOfAllocatableFPRegisters, 1382 kNumberOfAllocatableRegisterPairs, 1383 callee_saved_core_registers.GetList(), 1384 callee_saved_fp_registers.GetList(), 1385 compiler_options, 1386 stats), 1387 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1388 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1389 location_builder_(graph, this), 1390 instruction_visitor_(graph, this), 1391 move_resolver_(graph->GetAllocator(), this), 1392 assembler_(graph->GetAllocator()), 1393 isa_features_(isa_features), 1394 uint32_literals_(std::less<uint32_t>(), 1395 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1396 uint64_literals_(std::less<uint64_t>(), 1397 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1398 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1399 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1400 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1401 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1402 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1403 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1404 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1405 jit_string_patches_(StringReferenceValueComparator(), 1406 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1407 jit_class_patches_(TypeReferenceValueComparator(), 1408 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 1409 // Save the link register (containing the return address) to mimic Quick. 1410 AddAllocatedRegister(LocationFrom(lr)); 1411 } 1412 1413 #define __ GetVIXLAssembler()-> 1414 1415 void CodeGeneratorARM64::EmitJumpTables() { 1416 for (auto&& jump_table : jump_tables_) { 1417 jump_table->EmitTable(this); 1418 } 1419 } 1420 1421 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { 1422 EmitJumpTables(); 1423 // Ensure we emit the literal pool. 1424 __ FinalizeCode(); 1425 1426 CodeGenerator::Finalize(allocator); 1427 } 1428 1429 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { 1430 // Note: There are 6 kinds of moves: 1431 // 1. constant -> GPR/FPR (non-cycle) 1432 // 2. constant -> stack (non-cycle) 1433 // 3. GPR/FPR -> GPR/FPR 1434 // 4. GPR/FPR -> stack 1435 // 5. stack -> GPR/FPR 1436 // 6. stack -> stack (non-cycle) 1437 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 1438 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting 1439 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the 1440 // dependency. 1441 vixl_temps_.Open(GetVIXLAssembler()); 1442 } 1443 1444 void ParallelMoveResolverARM64::FinishEmitNativeCode() { 1445 vixl_temps_.Close(); 1446 } 1447 1448 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { 1449 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister 1450 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot 1451 || kind == Location::kSIMDStackSlot); 1452 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot) 1453 ? Location::kFpuRegister 1454 : Location::kRegister; 1455 Location scratch = GetScratchLocation(kind); 1456 if (!scratch.Equals(Location::NoLocation())) { 1457 return scratch; 1458 } 1459 // Allocate from VIXL temp registers. 1460 if (kind == Location::kRegister) { 1461 scratch = LocationFrom(vixl_temps_.AcquireX()); 1462 } else { 1463 DCHECK_EQ(kind, Location::kFpuRegister); 1464 scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() 1465 ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) 1466 : vixl_temps_.AcquireD()); 1467 } 1468 AddScratchLocation(scratch); 1469 return scratch; 1470 } 1471 1472 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { 1473 if (loc.IsRegister()) { 1474 vixl_temps_.Release(XRegisterFrom(loc)); 1475 } else { 1476 DCHECK(loc.IsFpuRegister()); 1477 vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc)); 1478 } 1479 RemoveScratchLocation(loc); 1480 } 1481 1482 void ParallelMoveResolverARM64::EmitMove(size_t index) { 1483 MoveOperands* move = moves_[index]; 1484 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); 1485 } 1486 1487 void CodeGeneratorARM64::GenerateFrameEntry() { 1488 MacroAssembler* masm = GetVIXLAssembler(); 1489 __ Bind(&frame_entry_label_); 1490 1491 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 1492 UseScratchRegisterScope temps(masm); 1493 Register temp = temps.AcquireX(); 1494 __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 1495 __ Add(temp, temp, 1); 1496 __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 1497 } 1498 1499 bool do_overflow_check = 1500 FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); 1501 if (do_overflow_check) { 1502 UseScratchRegisterScope temps(masm); 1503 Register temp = temps.AcquireX(); 1504 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1505 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64))); 1506 { 1507 // Ensure that between load and RecordPcInfo there are no pools emitted. 1508 ExactAssemblyScope eas(GetVIXLAssembler(), 1509 kInstructionSize, 1510 CodeBufferCheckScope::kExactSize); 1511 __ ldr(wzr, MemOperand(temp, 0)); 1512 RecordPcInfo(nullptr, 0); 1513 } 1514 } 1515 1516 if (!HasEmptyFrame()) { 1517 int frame_size = GetFrameSize(); 1518 // Stack layout: 1519 // sp[frame_size - 8] : lr. 1520 // ... : other preserved core registers. 1521 // ... : other preserved fp registers. 1522 // ... : reserved frame space. 1523 // sp[0] : current method. 1524 1525 // Save the current method if we need it. Note that we do not 1526 // do this in HCurrentMethod, as the instruction might have been removed 1527 // in the SSA graph. 1528 if (RequiresCurrentMethod()) { 1529 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); 1530 } else { 1531 __ Claim(frame_size); 1532 } 1533 GetAssembler()->cfi().AdjustCFAOffset(frame_size); 1534 GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), 1535 frame_size - GetCoreSpillSize()); 1536 GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), 1537 frame_size - FrameEntrySpillSize()); 1538 1539 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1540 // Initialize should_deoptimize flag to 0. 1541 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize); 1542 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); 1543 } 1544 } 1545 1546 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 1547 } 1548 1549 void CodeGeneratorARM64::GenerateFrameExit() { 1550 GetAssembler()->cfi().RememberState(); 1551 if (!HasEmptyFrame()) { 1552 int frame_size = GetFrameSize(); 1553 GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(), 1554 frame_size - FrameEntrySpillSize()); 1555 GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(), 1556 frame_size - GetCoreSpillSize()); 1557 __ Drop(frame_size); 1558 GetAssembler()->cfi().AdjustCFAOffset(-frame_size); 1559 } 1560 __ Ret(); 1561 GetAssembler()->cfi().RestoreState(); 1562 GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); 1563 } 1564 1565 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const { 1566 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0)); 1567 return CPURegList(CPURegister::kRegister, kXRegSize, 1568 core_spill_mask_); 1569 } 1570 1571 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const { 1572 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_, 1573 GetNumberOfFloatingPointRegisters())); 1574 return CPURegList(CPURegister::kFPRegister, kDRegSize, 1575 fpu_spill_mask_); 1576 } 1577 1578 void CodeGeneratorARM64::Bind(HBasicBlock* block) { 1579 __ Bind(GetLabelOf(block)); 1580 } 1581 1582 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { 1583 DCHECK(location.IsRegister()); 1584 __ Mov(RegisterFrom(location, DataType::Type::kInt32), value); 1585 } 1586 1587 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1588 if (location.IsRegister()) { 1589 locations->AddTemp(location); 1590 } else { 1591 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1592 } 1593 } 1594 1595 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { 1596 UseScratchRegisterScope temps(GetVIXLAssembler()); 1597 Register card = temps.AcquireX(); 1598 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. 1599 vixl::aarch64::Label done; 1600 if (value_can_be_null) { 1601 __ Cbz(value, &done); 1602 } 1603 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value())); 1604 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); 1605 __ Strb(card, MemOperand(card, temp.X())); 1606 if (value_can_be_null) { 1607 __ Bind(&done); 1608 } 1609 } 1610 1611 void CodeGeneratorARM64::SetupBlockedRegisters() const { 1612 // Blocked core registers: 1613 // lr : Runtime reserved. 1614 // tr : Runtime reserved. 1615 // mr : Runtime reserved. 1616 // ip1 : VIXL core temp. 1617 // ip0 : VIXL core temp. 1618 // 1619 // Blocked fp registers: 1620 // d31 : VIXL fp temp. 1621 CPURegList reserved_core_registers = vixl_reserved_core_registers; 1622 reserved_core_registers.Combine(runtime_reserved_core_registers); 1623 while (!reserved_core_registers.IsEmpty()) { 1624 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; 1625 } 1626 1627 CPURegList reserved_fp_registers = vixl_reserved_fp_registers; 1628 while (!reserved_fp_registers.IsEmpty()) { 1629 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true; 1630 } 1631 1632 if (GetGraph()->IsDebuggable()) { 1633 // Stubs do not save callee-save floating point registers. If the graph 1634 // is debuggable, we need to deal with these registers differently. For 1635 // now, just block them. 1636 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; 1637 while (!reserved_fp_registers_debuggable.IsEmpty()) { 1638 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true; 1639 } 1640 } 1641 } 1642 1643 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1644 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1645 __ Str(reg, MemOperand(sp, stack_index)); 1646 return kArm64WordSize; 1647 } 1648 1649 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1650 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1651 __ Ldr(reg, MemOperand(sp, stack_index)); 1652 return kArm64WordSize; 1653 } 1654 1655 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1656 FPRegister reg = FPRegister(reg_id, kDRegSize); 1657 __ Str(reg, MemOperand(sp, stack_index)); 1658 return kArm64WordSize; 1659 } 1660 1661 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1662 FPRegister reg = FPRegister(reg_id, kDRegSize); 1663 __ Ldr(reg, MemOperand(sp, stack_index)); 1664 return kArm64WordSize; 1665 } 1666 1667 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { 1668 stream << XRegister(reg); 1669 } 1670 1671 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1672 stream << DRegister(reg); 1673 } 1674 1675 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { 1676 if (constant->IsIntConstant()) { 1677 __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); 1678 } else if (constant->IsLongConstant()) { 1679 __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); 1680 } else if (constant->IsNullConstant()) { 1681 __ Mov(Register(destination), 0); 1682 } else if (constant->IsFloatConstant()) { 1683 __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue()); 1684 } else { 1685 DCHECK(constant->IsDoubleConstant()); 1686 __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue()); 1687 } 1688 } 1689 1690 1691 static bool CoherentConstantAndType(Location constant, DataType::Type type) { 1692 DCHECK(constant.IsConstant()); 1693 HConstant* cst = constant.GetConstant(); 1694 return (cst->IsIntConstant() && type == DataType::Type::kInt32) || 1695 // Null is mapped to a core W register, which we associate with kPrimInt. 1696 (cst->IsNullConstant() && type == DataType::Type::kInt32) || 1697 (cst->IsLongConstant() && type == DataType::Type::kInt64) || 1698 (cst->IsFloatConstant() && type == DataType::Type::kFloat32) || 1699 (cst->IsDoubleConstant() && type == DataType::Type::kFloat64); 1700 } 1701 1702 // Allocate a scratch register from the VIXL pool, querying first 1703 // the floating-point register pool, and then the core register 1704 // pool. This is essentially a reimplementation of 1705 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize 1706 // using a different allocation strategy. 1707 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, 1708 vixl::aarch64::UseScratchRegisterScope* temps, 1709 int size_in_bits) { 1710 return masm->GetScratchFPRegisterList()->IsEmpty() 1711 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits)) 1712 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits)); 1713 } 1714 1715 void CodeGeneratorARM64::MoveLocation(Location destination, 1716 Location source, 1717 DataType::Type dst_type) { 1718 if (source.Equals(destination)) { 1719 return; 1720 } 1721 1722 // A valid move can always be inferred from the destination and source 1723 // locations. When moving from and to a register, the argument type can be 1724 // used to generate 32bit instead of 64bit moves. In debug mode we also 1725 // checks the coherency of the locations and the type. 1726 bool unspecified_type = (dst_type == DataType::Type::kVoid); 1727 1728 if (destination.IsRegister() || destination.IsFpuRegister()) { 1729 if (unspecified_type) { 1730 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; 1731 if (source.IsStackSlot() || 1732 (src_cst != nullptr && (src_cst->IsIntConstant() 1733 || src_cst->IsFloatConstant() 1734 || src_cst->IsNullConstant()))) { 1735 // For stack slots and 32bit constants, a 64bit type is appropriate. 1736 dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; 1737 } else { 1738 // If the source is a double stack slot or a 64bit constant, a 64bit 1739 // type is appropriate. Else the source is a register, and since the 1740 // type has not been specified, we chose a 64bit type to force a 64bit 1741 // move. 1742 dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; 1743 } 1744 } 1745 DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || 1746 (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); 1747 CPURegister dst = CPURegisterFrom(destination, dst_type); 1748 if (source.IsStackSlot() || source.IsDoubleStackSlot()) { 1749 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); 1750 __ Ldr(dst, StackOperandFrom(source)); 1751 } else if (source.IsSIMDStackSlot()) { 1752 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source)); 1753 } else if (source.IsConstant()) { 1754 DCHECK(CoherentConstantAndType(source, dst_type)); 1755 MoveConstant(dst, source.GetConstant()); 1756 } else if (source.IsRegister()) { 1757 if (destination.IsRegister()) { 1758 __ Mov(Register(dst), RegisterFrom(source, dst_type)); 1759 } else { 1760 DCHECK(destination.IsFpuRegister()); 1761 DataType::Type source_type = DataType::Is64BitType(dst_type) 1762 ? DataType::Type::kInt64 1763 : DataType::Type::kInt32; 1764 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); 1765 } 1766 } else { 1767 DCHECK(source.IsFpuRegister()); 1768 if (destination.IsRegister()) { 1769 DataType::Type source_type = DataType::Is64BitType(dst_type) 1770 ? DataType::Type::kFloat64 1771 : DataType::Type::kFloat32; 1772 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); 1773 } else { 1774 DCHECK(destination.IsFpuRegister()); 1775 if (GetGraph()->HasSIMD()) { 1776 __ Mov(QRegisterFrom(destination), QRegisterFrom(source)); 1777 } else { 1778 __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type)); 1779 } 1780 } 1781 } 1782 } else if (destination.IsSIMDStackSlot()) { 1783 if (source.IsFpuRegister()) { 1784 __ Str(QRegisterFrom(source), StackOperandFrom(destination)); 1785 } else { 1786 DCHECK(source.IsSIMDStackSlot()); 1787 UseScratchRegisterScope temps(GetVIXLAssembler()); 1788 if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) { 1789 Register temp = temps.AcquireX(); 1790 __ Ldr(temp, MemOperand(sp, source.GetStackIndex())); 1791 __ Str(temp, MemOperand(sp, destination.GetStackIndex())); 1792 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize)); 1793 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize)); 1794 } else { 1795 FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); 1796 __ Ldr(temp, StackOperandFrom(source)); 1797 __ Str(temp, StackOperandFrom(destination)); 1798 } 1799 } 1800 } else { // The destination is not a register. It must be a stack slot. 1801 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); 1802 if (source.IsRegister() || source.IsFpuRegister()) { 1803 if (unspecified_type) { 1804 if (source.IsRegister()) { 1805 dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; 1806 } else { 1807 dst_type = 1808 destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; 1809 } 1810 } 1811 DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && 1812 (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); 1813 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); 1814 } else if (source.IsConstant()) { 1815 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) 1816 << source << " " << dst_type; 1817 UseScratchRegisterScope temps(GetVIXLAssembler()); 1818 HConstant* src_cst = source.GetConstant(); 1819 CPURegister temp; 1820 if (src_cst->IsZeroBitPattern()) { 1821 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) 1822 ? Register(xzr) 1823 : Register(wzr); 1824 } else { 1825 if (src_cst->IsIntConstant()) { 1826 temp = temps.AcquireW(); 1827 } else if (src_cst->IsLongConstant()) { 1828 temp = temps.AcquireX(); 1829 } else if (src_cst->IsFloatConstant()) { 1830 temp = temps.AcquireS(); 1831 } else { 1832 DCHECK(src_cst->IsDoubleConstant()); 1833 temp = temps.AcquireD(); 1834 } 1835 MoveConstant(temp, src_cst); 1836 } 1837 __ Str(temp, StackOperandFrom(destination)); 1838 } else { 1839 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); 1840 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); 1841 UseScratchRegisterScope temps(GetVIXLAssembler()); 1842 // Use any scratch register (a core or a floating-point one) 1843 // from VIXL scratch register pools as a temporary. 1844 // 1845 // We used to only use the FP scratch register pool, but in some 1846 // rare cases the only register from this pool (D31) would 1847 // already be used (e.g. within a ParallelMove instruction, when 1848 // a move is blocked by a another move requiring a scratch FP 1849 // register, which would reserve D31). To prevent this issue, we 1850 // ask for a scratch register of any type (core or FP). 1851 // 1852 // Also, we start by asking for a FP scratch register first, as the 1853 // demand of scratch core registers is higher. This is why we 1854 // use AcquireFPOrCoreCPURegisterOfSize instead of 1855 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which 1856 // allocates core scratch registers first. 1857 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize( 1858 GetVIXLAssembler(), 1859 &temps, 1860 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize)); 1861 __ Ldr(temp, StackOperandFrom(source)); 1862 __ Str(temp, StackOperandFrom(destination)); 1863 } 1864 } 1865 } 1866 1867 void CodeGeneratorARM64::Load(DataType::Type type, 1868 CPURegister dst, 1869 const MemOperand& src) { 1870 switch (type) { 1871 case DataType::Type::kBool: 1872 case DataType::Type::kUint8: 1873 __ Ldrb(Register(dst), src); 1874 break; 1875 case DataType::Type::kInt8: 1876 __ Ldrsb(Register(dst), src); 1877 break; 1878 case DataType::Type::kUint16: 1879 __ Ldrh(Register(dst), src); 1880 break; 1881 case DataType::Type::kInt16: 1882 __ Ldrsh(Register(dst), src); 1883 break; 1884 case DataType::Type::kInt32: 1885 case DataType::Type::kReference: 1886 case DataType::Type::kInt64: 1887 case DataType::Type::kFloat32: 1888 case DataType::Type::kFloat64: 1889 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1890 __ Ldr(dst, src); 1891 break; 1892 case DataType::Type::kUint32: 1893 case DataType::Type::kUint64: 1894 case DataType::Type::kVoid: 1895 LOG(FATAL) << "Unreachable type " << type; 1896 } 1897 } 1898 1899 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, 1900 CPURegister dst, 1901 const MemOperand& src, 1902 bool needs_null_check) { 1903 MacroAssembler* masm = GetVIXLAssembler(); 1904 UseScratchRegisterScope temps(masm); 1905 Register temp_base = temps.AcquireX(); 1906 DataType::Type type = instruction->GetType(); 1907 1908 DCHECK(!src.IsPreIndex()); 1909 DCHECK(!src.IsPostIndex()); 1910 1911 // TODO(vixl): Let the MacroAssembler handle MemOperand. 1912 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src)); 1913 { 1914 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 1915 MemOperand base = MemOperand(temp_base); 1916 switch (type) { 1917 case DataType::Type::kBool: 1918 case DataType::Type::kUint8: 1919 case DataType::Type::kInt8: 1920 { 1921 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1922 __ ldarb(Register(dst), base); 1923 if (needs_null_check) { 1924 MaybeRecordImplicitNullCheck(instruction); 1925 } 1926 } 1927 if (type == DataType::Type::kInt8) { 1928 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); 1929 } 1930 break; 1931 case DataType::Type::kUint16: 1932 case DataType::Type::kInt16: 1933 { 1934 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1935 __ ldarh(Register(dst), base); 1936 if (needs_null_check) { 1937 MaybeRecordImplicitNullCheck(instruction); 1938 } 1939 } 1940 if (type == DataType::Type::kInt16) { 1941 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); 1942 } 1943 break; 1944 case DataType::Type::kInt32: 1945 case DataType::Type::kReference: 1946 case DataType::Type::kInt64: 1947 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1948 { 1949 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1950 __ ldar(Register(dst), base); 1951 if (needs_null_check) { 1952 MaybeRecordImplicitNullCheck(instruction); 1953 } 1954 } 1955 break; 1956 case DataType::Type::kFloat32: 1957 case DataType::Type::kFloat64: { 1958 DCHECK(dst.IsFPRegister()); 1959 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1960 1961 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 1962 { 1963 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1964 __ ldar(temp, base); 1965 if (needs_null_check) { 1966 MaybeRecordImplicitNullCheck(instruction); 1967 } 1968 } 1969 __ Fmov(FPRegister(dst), temp); 1970 break; 1971 } 1972 case DataType::Type::kUint32: 1973 case DataType::Type::kUint64: 1974 case DataType::Type::kVoid: 1975 LOG(FATAL) << "Unreachable type " << type; 1976 } 1977 } 1978 } 1979 1980 void CodeGeneratorARM64::Store(DataType::Type type, 1981 CPURegister src, 1982 const MemOperand& dst) { 1983 switch (type) { 1984 case DataType::Type::kBool: 1985 case DataType::Type::kUint8: 1986 case DataType::Type::kInt8: 1987 __ Strb(Register(src), dst); 1988 break; 1989 case DataType::Type::kUint16: 1990 case DataType::Type::kInt16: 1991 __ Strh(Register(src), dst); 1992 break; 1993 case DataType::Type::kInt32: 1994 case DataType::Type::kReference: 1995 case DataType::Type::kInt64: 1996 case DataType::Type::kFloat32: 1997 case DataType::Type::kFloat64: 1998 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 1999 __ Str(src, dst); 2000 break; 2001 case DataType::Type::kUint32: 2002 case DataType::Type::kUint64: 2003 case DataType::Type::kVoid: 2004 LOG(FATAL) << "Unreachable type " << type; 2005 } 2006 } 2007 2008 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, 2009 DataType::Type type, 2010 CPURegister src, 2011 const MemOperand& dst, 2012 bool needs_null_check) { 2013 MacroAssembler* masm = GetVIXLAssembler(); 2014 UseScratchRegisterScope temps(GetVIXLAssembler()); 2015 Register temp_base = temps.AcquireX(); 2016 2017 DCHECK(!dst.IsPreIndex()); 2018 DCHECK(!dst.IsPostIndex()); 2019 2020 // TODO(vixl): Let the MacroAssembler handle this. 2021 Operand op = OperandFromMemOperand(dst); 2022 __ Add(temp_base, dst.GetBaseRegister(), op); 2023 MemOperand base = MemOperand(temp_base); 2024 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2025 switch (type) { 2026 case DataType::Type::kBool: 2027 case DataType::Type::kUint8: 2028 case DataType::Type::kInt8: 2029 { 2030 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2031 __ stlrb(Register(src), base); 2032 if (needs_null_check) { 2033 MaybeRecordImplicitNullCheck(instruction); 2034 } 2035 } 2036 break; 2037 case DataType::Type::kUint16: 2038 case DataType::Type::kInt16: 2039 { 2040 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2041 __ stlrh(Register(src), base); 2042 if (needs_null_check) { 2043 MaybeRecordImplicitNullCheck(instruction); 2044 } 2045 } 2046 break; 2047 case DataType::Type::kInt32: 2048 case DataType::Type::kReference: 2049 case DataType::Type::kInt64: 2050 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 2051 { 2052 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2053 __ stlr(Register(src), base); 2054 if (needs_null_check) { 2055 MaybeRecordImplicitNullCheck(instruction); 2056 } 2057 } 2058 break; 2059 case DataType::Type::kFloat32: 2060 case DataType::Type::kFloat64: { 2061 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 2062 Register temp_src; 2063 if (src.IsZero()) { 2064 // The zero register is used to avoid synthesizing zero constants. 2065 temp_src = Register(src); 2066 } else { 2067 DCHECK(src.IsFPRegister()); 2068 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 2069 __ Fmov(temp_src, FPRegister(src)); 2070 } 2071 { 2072 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2073 __ stlr(temp_src, base); 2074 if (needs_null_check) { 2075 MaybeRecordImplicitNullCheck(instruction); 2076 } 2077 } 2078 break; 2079 } 2080 case DataType::Type::kUint32: 2081 case DataType::Type::kUint64: 2082 case DataType::Type::kVoid: 2083 LOG(FATAL) << "Unreachable type " << type; 2084 } 2085 } 2086 2087 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, 2088 HInstruction* instruction, 2089 uint32_t dex_pc, 2090 SlowPathCode* slow_path) { 2091 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 2092 2093 __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value())); 2094 { 2095 // Ensure the pc position is recorded immediately after the `blr` instruction. 2096 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 2097 __ blr(lr); 2098 if (EntrypointRequiresStackMap(entrypoint)) { 2099 RecordPcInfo(instruction, dex_pc, slow_path); 2100 } 2101 } 2102 } 2103 2104 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 2105 HInstruction* instruction, 2106 SlowPathCode* slow_path) { 2107 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 2108 __ Ldr(lr, MemOperand(tr, entry_point_offset)); 2109 __ Blr(lr); 2110 } 2111 2112 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, 2113 Register class_reg) { 2114 UseScratchRegisterScope temps(GetVIXLAssembler()); 2115 Register temp = temps.AcquireW(); 2116 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 2117 const size_t status_byte_offset = 2118 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 2119 constexpr uint32_t shifted_initialized_value = 2120 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); 2121 2122 // Even if the initialized flag is set, we need to ensure consistent memory ordering. 2123 // TODO(vixl): Let the MacroAssembler handle MemOperand. 2124 __ Add(temp, class_reg, status_byte_offset); 2125 __ Ldarb(temp, HeapOperand(temp)); 2126 __ Cmp(temp, shifted_initialized_value); 2127 __ B(lo, slow_path->GetEntryLabel()); 2128 __ Bind(slow_path->GetExitLabel()); 2129 } 2130 2131 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { 2132 BarrierType type = BarrierAll; 2133 2134 switch (kind) { 2135 case MemBarrierKind::kAnyAny: 2136 case MemBarrierKind::kAnyStore: { 2137 type = BarrierAll; 2138 break; 2139 } 2140 case MemBarrierKind::kLoadAny: { 2141 type = BarrierReads; 2142 break; 2143 } 2144 case MemBarrierKind::kStoreStore: { 2145 type = BarrierWrites; 2146 break; 2147 } 2148 default: 2149 LOG(FATAL) << "Unexpected memory barrier " << kind; 2150 } 2151 __ Dmb(InnerShareable, type); 2152 } 2153 2154 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, 2155 HBasicBlock* successor) { 2156 SuspendCheckSlowPathARM64* slow_path = 2157 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); 2158 if (slow_path == nullptr) { 2159 slow_path = 2160 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor); 2161 instruction->SetSlowPath(slow_path); 2162 codegen_->AddSlowPath(slow_path); 2163 if (successor != nullptr) { 2164 DCHECK(successor->IsLoopHeader()); 2165 } 2166 } else { 2167 DCHECK_EQ(slow_path->GetSuccessor(), successor); 2168 } 2169 2170 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 2171 Register temp = temps.AcquireW(); 2172 2173 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue())); 2174 if (successor == nullptr) { 2175 __ Cbnz(temp, slow_path->GetEntryLabel()); 2176 __ Bind(slow_path->GetReturnLabel()); 2177 } else { 2178 __ Cbz(temp, codegen_->GetLabelOf(successor)); 2179 __ B(slow_path->GetEntryLabel()); 2180 // slow_path will return to GetLabelOf(successor). 2181 } 2182 } 2183 2184 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, 2185 CodeGeneratorARM64* codegen) 2186 : InstructionCodeGenerator(graph, codegen), 2187 assembler_(codegen->GetAssembler()), 2188 codegen_(codegen) {} 2189 2190 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { 2191 DCHECK_EQ(instr->InputCount(), 2U); 2192 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 2193 DataType::Type type = instr->GetResultType(); 2194 switch (type) { 2195 case DataType::Type::kInt32: 2196 case DataType::Type::kInt64: 2197 locations->SetInAt(0, Location::RequiresRegister()); 2198 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr)); 2199 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2200 break; 2201 2202 case DataType::Type::kFloat32: 2203 case DataType::Type::kFloat64: 2204 locations->SetInAt(0, Location::RequiresFpuRegister()); 2205 locations->SetInAt(1, Location::RequiresFpuRegister()); 2206 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2207 break; 2208 2209 default: 2210 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type; 2211 } 2212 } 2213 2214 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, 2215 const FieldInfo& field_info) { 2216 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 2217 2218 bool object_field_get_with_read_barrier = 2219 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 2220 LocationSummary* locations = 2221 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 2222 object_field_get_with_read_barrier 2223 ? LocationSummary::kCallOnSlowPath 2224 : LocationSummary::kNoCall); 2225 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 2226 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2227 // We need a temporary register for the read barrier marking slow 2228 // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier. 2229 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 2230 !Runtime::Current()->UseJitCompilation() && 2231 !field_info.IsVolatile()) { 2232 // If link-time thunks for the Baker read barrier are enabled, for AOT 2233 // non-volatile loads we need a temporary only if the offset is too big. 2234 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { 2235 locations->AddTemp(FixedTempLocation()); 2236 } 2237 } else { 2238 locations->AddTemp(Location::RequiresRegister()); 2239 } 2240 } 2241 locations->SetInAt(0, Location::RequiresRegister()); 2242 if (DataType::IsFloatingPointType(instruction->GetType())) { 2243 locations->SetOut(Location::RequiresFpuRegister()); 2244 } else { 2245 // The output overlaps for an object field get when read barriers 2246 // are enabled: we do not want the load to overwrite the object's 2247 // location, as we need it to emit the read barrier. 2248 locations->SetOut( 2249 Location::RequiresRegister(), 2250 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 2251 } 2252 } 2253 2254 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, 2255 const FieldInfo& field_info) { 2256 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 2257 LocationSummary* locations = instruction->GetLocations(); 2258 Location base_loc = locations->InAt(0); 2259 Location out = locations->Out(); 2260 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 2261 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 2262 DataType::Type load_type = instruction->GetType(); 2263 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); 2264 2265 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && 2266 load_type == DataType::Type::kReference) { 2267 // Object FieldGet with Baker's read barrier case. 2268 // /* HeapReference<Object> */ out = *(base + offset) 2269 Register base = RegisterFrom(base_loc, DataType::Type::kReference); 2270 Location maybe_temp = 2271 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 2272 // Note that potential implicit null checks are handled in this 2273 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. 2274 codegen_->GenerateFieldLoadWithBakerReadBarrier( 2275 instruction, 2276 out, 2277 base, 2278 offset, 2279 maybe_temp, 2280 /* needs_null_check */ true, 2281 field_info.IsVolatile()); 2282 } else { 2283 // General case. 2284 if (field_info.IsVolatile()) { 2285 // Note that a potential implicit null check is handled in this 2286 // CodeGeneratorARM64::LoadAcquire call. 2287 // NB: LoadAcquire will record the pc info if needed. 2288 codegen_->LoadAcquire( 2289 instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); 2290 } else { 2291 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2292 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2293 codegen_->Load(load_type, OutputCPURegister(instruction), field); 2294 codegen_->MaybeRecordImplicitNullCheck(instruction); 2295 } 2296 if (load_type == DataType::Type::kReference) { 2297 // If read barriers are enabled, emit read barriers other than 2298 // Baker's using a slow path (and also unpoison the loaded 2299 // reference, if heap poisoning is enabled). 2300 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 2301 } 2302 } 2303 } 2304 2305 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { 2306 LocationSummary* locations = 2307 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2308 locations->SetInAt(0, Location::RequiresRegister()); 2309 if (IsConstantZeroBitPattern(instruction->InputAt(1))) { 2310 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 2311 } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 2312 locations->SetInAt(1, Location::RequiresFpuRegister()); 2313 } else { 2314 locations->SetInAt(1, Location::RequiresRegister()); 2315 } 2316 } 2317 2318 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, 2319 const FieldInfo& field_info, 2320 bool value_can_be_null) { 2321 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 2322 2323 Register obj = InputRegisterAt(instruction, 0); 2324 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1); 2325 CPURegister source = value; 2326 Offset offset = field_info.GetFieldOffset(); 2327 DataType::Type field_type = field_info.GetFieldType(); 2328 2329 { 2330 // We use a block to end the scratch scope before the write barrier, thus 2331 // freeing the temporary registers so they can be used in `MarkGCCard`. 2332 UseScratchRegisterScope temps(GetVIXLAssembler()); 2333 2334 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 2335 DCHECK(value.IsW()); 2336 Register temp = temps.AcquireW(); 2337 __ Mov(temp, value.W()); 2338 GetAssembler()->PoisonHeapReference(temp.W()); 2339 source = temp; 2340 } 2341 2342 if (field_info.IsVolatile()) { 2343 codegen_->StoreRelease( 2344 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true); 2345 } else { 2346 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2347 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2348 codegen_->Store(field_type, source, HeapOperand(obj, offset)); 2349 codegen_->MaybeRecordImplicitNullCheck(instruction); 2350 } 2351 } 2352 2353 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 2354 codegen_->MarkGCCard(obj, Register(value), value_can_be_null); 2355 } 2356 } 2357 2358 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { 2359 DataType::Type type = instr->GetType(); 2360 2361 switch (type) { 2362 case DataType::Type::kInt32: 2363 case DataType::Type::kInt64: { 2364 Register dst = OutputRegister(instr); 2365 Register lhs = InputRegisterAt(instr, 0); 2366 Operand rhs = InputOperandAt(instr, 1); 2367 if (instr->IsAdd()) { 2368 __ Add(dst, lhs, rhs); 2369 } else if (instr->IsAnd()) { 2370 __ And(dst, lhs, rhs); 2371 } else if (instr->IsOr()) { 2372 __ Orr(dst, lhs, rhs); 2373 } else if (instr->IsSub()) { 2374 __ Sub(dst, lhs, rhs); 2375 } else if (instr->IsRor()) { 2376 if (rhs.IsImmediate()) { 2377 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1); 2378 __ Ror(dst, lhs, shift); 2379 } else { 2380 // Ensure shift distance is in the same size register as the result. If 2381 // we are rotating a long and the shift comes in a w register originally, 2382 // we don't need to sxtw for use as an x since the shift distances are 2383 // all & reg_bits - 1. 2384 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); 2385 } 2386 } else { 2387 DCHECK(instr->IsXor()); 2388 __ Eor(dst, lhs, rhs); 2389 } 2390 break; 2391 } 2392 case DataType::Type::kFloat32: 2393 case DataType::Type::kFloat64: { 2394 FPRegister dst = OutputFPRegister(instr); 2395 FPRegister lhs = InputFPRegisterAt(instr, 0); 2396 FPRegister rhs = InputFPRegisterAt(instr, 1); 2397 if (instr->IsAdd()) { 2398 __ Fadd(dst, lhs, rhs); 2399 } else if (instr->IsSub()) { 2400 __ Fsub(dst, lhs, rhs); 2401 } else { 2402 LOG(FATAL) << "Unexpected floating-point binary operation"; 2403 } 2404 break; 2405 } 2406 default: 2407 LOG(FATAL) << "Unexpected binary operation type " << type; 2408 } 2409 } 2410 2411 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { 2412 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2413 2414 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 2415 DataType::Type type = instr->GetResultType(); 2416 switch (type) { 2417 case DataType::Type::kInt32: 2418 case DataType::Type::kInt64: { 2419 locations->SetInAt(0, Location::RequiresRegister()); 2420 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); 2421 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2422 break; 2423 } 2424 default: 2425 LOG(FATAL) << "Unexpected shift type " << type; 2426 } 2427 } 2428 2429 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) { 2430 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2431 2432 DataType::Type type = instr->GetType(); 2433 switch (type) { 2434 case DataType::Type::kInt32: 2435 case DataType::Type::kInt64: { 2436 Register dst = OutputRegister(instr); 2437 Register lhs = InputRegisterAt(instr, 0); 2438 Operand rhs = InputOperandAt(instr, 1); 2439 if (rhs.IsImmediate()) { 2440 uint32_t shift_value = rhs.GetImmediate() & 2441 (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); 2442 if (instr->IsShl()) { 2443 __ Lsl(dst, lhs, shift_value); 2444 } else if (instr->IsShr()) { 2445 __ Asr(dst, lhs, shift_value); 2446 } else { 2447 __ Lsr(dst, lhs, shift_value); 2448 } 2449 } else { 2450 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W(); 2451 2452 if (instr->IsShl()) { 2453 __ Lsl(dst, lhs, rhs_reg); 2454 } else if (instr->IsShr()) { 2455 __ Asr(dst, lhs, rhs_reg); 2456 } else { 2457 __ Lsr(dst, lhs, rhs_reg); 2458 } 2459 } 2460 break; 2461 } 2462 default: 2463 LOG(FATAL) << "Unexpected shift operation type " << type; 2464 } 2465 } 2466 2467 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) { 2468 HandleBinaryOp(instruction); 2469 } 2470 2471 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) { 2472 HandleBinaryOp(instruction); 2473 } 2474 2475 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) { 2476 HandleBinaryOp(instruction); 2477 } 2478 2479 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { 2480 HandleBinaryOp(instruction); 2481 } 2482 2483 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2484 DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType(); 2485 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 2486 locations->SetInAt(0, Location::RequiresRegister()); 2487 // There is no immediate variant of negated bitwise instructions in AArch64. 2488 locations->SetInAt(1, Location::RequiresRegister()); 2489 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2490 } 2491 2492 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2493 Register dst = OutputRegister(instr); 2494 Register lhs = InputRegisterAt(instr, 0); 2495 Register rhs = InputRegisterAt(instr, 1); 2496 2497 switch (instr->GetOpKind()) { 2498 case HInstruction::kAnd: 2499 __ Bic(dst, lhs, rhs); 2500 break; 2501 case HInstruction::kOr: 2502 __ Orn(dst, lhs, rhs); 2503 break; 2504 case HInstruction::kXor: 2505 __ Eon(dst, lhs, rhs); 2506 break; 2507 default: 2508 LOG(FATAL) << "Unreachable"; 2509 } 2510 } 2511 2512 void LocationsBuilderARM64::VisitDataProcWithShifterOp( 2513 HDataProcWithShifterOp* instruction) { 2514 DCHECK(instruction->GetType() == DataType::Type::kInt32 || 2515 instruction->GetType() == DataType::Type::kInt64); 2516 LocationSummary* locations = 2517 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2518 if (instruction->GetInstrKind() == HInstruction::kNeg) { 2519 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); 2520 } else { 2521 locations->SetInAt(0, Location::RequiresRegister()); 2522 } 2523 locations->SetInAt(1, Location::RequiresRegister()); 2524 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2525 } 2526 2527 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp( 2528 HDataProcWithShifterOp* instruction) { 2529 DataType::Type type = instruction->GetType(); 2530 HInstruction::InstructionKind kind = instruction->GetInstrKind(); 2531 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 2532 Register out = OutputRegister(instruction); 2533 Register left; 2534 if (kind != HInstruction::kNeg) { 2535 left = InputRegisterAt(instruction, 0); 2536 } 2537 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the 2538 // shifter operand operation, the IR generating `right_reg` (input to the type 2539 // conversion) can have a different type from the current instruction's type, 2540 // so we manually indicate the type. 2541 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); 2542 Operand right_operand(0); 2543 2544 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); 2545 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { 2546 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); 2547 } else { 2548 right_operand = Operand(right_reg, 2549 helpers::ShiftFromOpKind(op_kind), 2550 instruction->GetShiftAmount()); 2551 } 2552 2553 // Logical binary operations do not support extension operations in the 2554 // operand. Note that VIXL would still manage if it was passed by generating 2555 // the extension as a separate instruction. 2556 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. 2557 DCHECK(!right_operand.IsExtendedRegister() || 2558 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && 2559 kind != HInstruction::kNeg)); 2560 switch (kind) { 2561 case HInstruction::kAdd: 2562 __ Add(out, left, right_operand); 2563 break; 2564 case HInstruction::kAnd: 2565 __ And(out, left, right_operand); 2566 break; 2567 case HInstruction::kNeg: 2568 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero()); 2569 __ Neg(out, right_operand); 2570 break; 2571 case HInstruction::kOr: 2572 __ Orr(out, left, right_operand); 2573 break; 2574 case HInstruction::kSub: 2575 __ Sub(out, left, right_operand); 2576 break; 2577 case HInstruction::kXor: 2578 __ Eor(out, left, right_operand); 2579 break; 2580 default: 2581 LOG(FATAL) << "Unexpected operation kind: " << kind; 2582 UNREACHABLE(); 2583 } 2584 } 2585 2586 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2587 LocationSummary* locations = 2588 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2589 locations->SetInAt(0, Location::RequiresRegister()); 2590 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction)); 2591 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2592 } 2593 2594 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2595 __ Add(OutputRegister(instruction), 2596 InputRegisterAt(instruction, 0), 2597 Operand(InputOperandAt(instruction, 1))); 2598 } 2599 2600 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { 2601 LocationSummary* locations = 2602 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2603 2604 HIntConstant* shift = instruction->GetShift()->AsIntConstant(); 2605 2606 locations->SetInAt(0, Location::RequiresRegister()); 2607 // For byte case we don't need to shift the index variable so we can encode the data offset into 2608 // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist 2609 // data offset constant generation out of the loop and reduce the critical path length in the 2610 // loop. 2611 locations->SetInAt(1, shift->GetValue() == 0 2612 ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) 2613 : Location::RequiresRegister()); 2614 locations->SetInAt(2, Location::ConstantLocation(shift)); 2615 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2616 } 2617 2618 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( 2619 HIntermediateAddressIndex* instruction) { 2620 Register index_reg = InputRegisterAt(instruction, 0); 2621 uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2)); 2622 uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); 2623 2624 if (shift == 0) { 2625 __ Add(OutputRegister(instruction), index_reg, offset); 2626 } else { 2627 Register offset_reg = InputRegisterAt(instruction, 1); 2628 __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift)); 2629 } 2630 } 2631 2632 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2633 LocationSummary* locations = 2634 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall); 2635 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2636 if (instr->GetOpKind() == HInstruction::kSub && 2637 accumulator->IsConstant() && 2638 accumulator->AsConstant()->IsArithmeticZero()) { 2639 // Don't allocate register for Mneg instruction. 2640 } else { 2641 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, 2642 Location::RequiresRegister()); 2643 } 2644 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); 2645 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); 2646 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2647 } 2648 2649 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2650 Register res = OutputRegister(instr); 2651 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex); 2652 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex); 2653 2654 // Avoid emitting code that could trigger Cortex A53's erratum 835769. 2655 // This fixup should be carried out for all multiply-accumulate instructions: 2656 // madd, msub, smaddl, smsubl, umaddl and umsubl. 2657 if (instr->GetType() == DataType::Type::kInt64 && 2658 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { 2659 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); 2660 vixl::aarch64::Instruction* prev = 2661 masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize; 2662 if (prev->IsLoadOrStore()) { 2663 // Make sure we emit only exactly one nop. 2664 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2665 __ nop(); 2666 } 2667 } 2668 2669 if (instr->GetOpKind() == HInstruction::kAdd) { 2670 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2671 __ Madd(res, mul_left, mul_right, accumulator); 2672 } else { 2673 DCHECK(instr->GetOpKind() == HInstruction::kSub); 2674 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2675 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) { 2676 __ Mneg(res, mul_left, mul_right); 2677 } else { 2678 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2679 __ Msub(res, mul_left, mul_right, accumulator); 2680 } 2681 } 2682 } 2683 2684 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { 2685 bool object_array_get_with_read_barrier = 2686 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 2687 LocationSummary* locations = 2688 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 2689 object_array_get_with_read_barrier 2690 ? LocationSummary::kCallOnSlowPath 2691 : LocationSummary::kNoCall); 2692 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 2693 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2694 // We need a temporary register for the read barrier marking slow 2695 // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier. 2696 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 2697 !Runtime::Current()->UseJitCompilation() && 2698 instruction->GetIndex()->IsConstant()) { 2699 // Array loads with constant index are treated as field loads. 2700 // If link-time thunks for the Baker read barrier are enabled, for AOT 2701 // constant index loads we need a temporary only if the offset is too big. 2702 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2703 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); 2704 offset += index << DataType::SizeShift(DataType::Type::kReference); 2705 if (offset >= kReferenceLoadMinFarOffset) { 2706 locations->AddTemp(FixedTempLocation()); 2707 } 2708 } else { 2709 locations->AddTemp(Location::RequiresRegister()); 2710 } 2711 } 2712 locations->SetInAt(0, Location::RequiresRegister()); 2713 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2714 if (DataType::IsFloatingPointType(instruction->GetType())) { 2715 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2716 } else { 2717 // The output overlaps in the case of an object array get with 2718 // read barriers enabled: we do not want the move to overwrite the 2719 // array's location, as we need it to emit the read barrier. 2720 locations->SetOut( 2721 Location::RequiresRegister(), 2722 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 2723 } 2724 } 2725 2726 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { 2727 DataType::Type type = instruction->GetType(); 2728 Register obj = InputRegisterAt(instruction, 0); 2729 LocationSummary* locations = instruction->GetLocations(); 2730 Location index = locations->InAt(1); 2731 Location out = locations->Out(); 2732 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2733 const bool maybe_compressed_char_at = mirror::kUseStringCompression && 2734 instruction->IsStringCharAt(); 2735 MacroAssembler* masm = GetVIXLAssembler(); 2736 UseScratchRegisterScope temps(masm); 2737 2738 // The read barrier instrumentation of object ArrayGet instructions 2739 // does not support the HIntermediateAddress instruction. 2740 DCHECK(!((type == DataType::Type::kReference) && 2741 instruction->GetArray()->IsIntermediateAddress() && 2742 kEmitCompilerReadBarrier)); 2743 2744 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2745 // Object ArrayGet with Baker's read barrier case. 2746 // Note that a potential implicit null check is handled in the 2747 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. 2748 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); 2749 if (index.IsConstant()) { 2750 // Array load with a constant index can be treated as a field load. 2751 offset += Int64ConstantFrom(index) << DataType::SizeShift(type); 2752 Location maybe_temp = 2753 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 2754 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 2755 out, 2756 obj.W(), 2757 offset, 2758 maybe_temp, 2759 /* needs_null_check */ false, 2760 /* use_load_acquire */ false); 2761 } else { 2762 Register temp = WRegisterFrom(locations->GetTemp(0)); 2763 codegen_->GenerateArrayLoadWithBakerReadBarrier( 2764 instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); 2765 } 2766 } else { 2767 // General case. 2768 MemOperand source = HeapOperand(obj); 2769 Register length; 2770 if (maybe_compressed_char_at) { 2771 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 2772 length = temps.AcquireW(); 2773 { 2774 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2775 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2776 2777 if (instruction->GetArray()->IsIntermediateAddress()) { 2778 DCHECK_LT(count_offset, offset); 2779 int64_t adjusted_offset = 2780 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset); 2781 // Note that `adjusted_offset` is negative, so this will be a LDUR. 2782 __ Ldr(length, MemOperand(obj.X(), adjusted_offset)); 2783 } else { 2784 __ Ldr(length, HeapOperand(obj, count_offset)); 2785 } 2786 codegen_->MaybeRecordImplicitNullCheck(instruction); 2787 } 2788 } 2789 if (index.IsConstant()) { 2790 if (maybe_compressed_char_at) { 2791 vixl::aarch64::Label uncompressed_load, done; 2792 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2793 "Expecting 0=compressed, 1=uncompressed"); 2794 __ Tbnz(length.W(), 0, &uncompressed_load); 2795 __ Ldrb(Register(OutputCPURegister(instruction)), 2796 HeapOperand(obj, offset + Int64ConstantFrom(index))); 2797 __ B(&done); 2798 __ Bind(&uncompressed_load); 2799 __ Ldrh(Register(OutputCPURegister(instruction)), 2800 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); 2801 __ Bind(&done); 2802 } else { 2803 offset += Int64ConstantFrom(index) << DataType::SizeShift(type); 2804 source = HeapOperand(obj, offset); 2805 } 2806 } else { 2807 Register temp = temps.AcquireSameSizeAs(obj); 2808 if (instruction->GetArray()->IsIntermediateAddress()) { 2809 // We do not need to compute the intermediate address from the array: the 2810 // input instruction has done it already. See the comment in 2811 // `TryExtractArrayAccessAddress()`. 2812 if (kIsDebugBuild) { 2813 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); 2814 DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); 2815 } 2816 temp = obj; 2817 } else { 2818 __ Add(temp, obj, offset); 2819 } 2820 if (maybe_compressed_char_at) { 2821 vixl::aarch64::Label uncompressed_load, done; 2822 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2823 "Expecting 0=compressed, 1=uncompressed"); 2824 __ Tbnz(length.W(), 0, &uncompressed_load); 2825 __ Ldrb(Register(OutputCPURegister(instruction)), 2826 HeapOperand(temp, XRegisterFrom(index), LSL, 0)); 2827 __ B(&done); 2828 __ Bind(&uncompressed_load); 2829 __ Ldrh(Register(OutputCPURegister(instruction)), 2830 HeapOperand(temp, XRegisterFrom(index), LSL, 1)); 2831 __ Bind(&done); 2832 } else { 2833 source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type)); 2834 } 2835 } 2836 if (!maybe_compressed_char_at) { 2837 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2838 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2839 codegen_->Load(type, OutputCPURegister(instruction), source); 2840 codegen_->MaybeRecordImplicitNullCheck(instruction); 2841 } 2842 2843 if (type == DataType::Type::kReference) { 2844 static_assert( 2845 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 2846 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 2847 Location obj_loc = locations->InAt(0); 2848 if (index.IsConstant()) { 2849 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); 2850 } else { 2851 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); 2852 } 2853 } 2854 } 2855 } 2856 2857 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { 2858 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 2859 locations->SetInAt(0, Location::RequiresRegister()); 2860 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2861 } 2862 2863 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { 2864 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 2865 vixl::aarch64::Register out = OutputRegister(instruction); 2866 { 2867 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2868 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2869 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset)); 2870 codegen_->MaybeRecordImplicitNullCheck(instruction); 2871 } 2872 // Mask out compression flag from String's array length. 2873 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 2874 __ Lsr(out.W(), out.W(), 1u); 2875 } 2876 } 2877 2878 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { 2879 DataType::Type value_type = instruction->GetComponentType(); 2880 2881 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2882 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 2883 instruction, 2884 may_need_runtime_call_for_type_check ? 2885 LocationSummary::kCallOnSlowPath : 2886 LocationSummary::kNoCall); 2887 locations->SetInAt(0, Location::RequiresRegister()); 2888 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2889 if (IsConstantZeroBitPattern(instruction->InputAt(2))) { 2890 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 2891 } else if (DataType::IsFloatingPointType(value_type)) { 2892 locations->SetInAt(2, Location::RequiresFpuRegister()); 2893 } else { 2894 locations->SetInAt(2, Location::RequiresRegister()); 2895 } 2896 } 2897 2898 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { 2899 DataType::Type value_type = instruction->GetComponentType(); 2900 LocationSummary* locations = instruction->GetLocations(); 2901 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2902 bool needs_write_barrier = 2903 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 2904 2905 Register array = InputRegisterAt(instruction, 0); 2906 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2); 2907 CPURegister source = value; 2908 Location index = locations->InAt(1); 2909 size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); 2910 MemOperand destination = HeapOperand(array); 2911 MacroAssembler* masm = GetVIXLAssembler(); 2912 2913 if (!needs_write_barrier) { 2914 DCHECK(!may_need_runtime_call_for_type_check); 2915 if (index.IsConstant()) { 2916 offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); 2917 destination = HeapOperand(array, offset); 2918 } else { 2919 UseScratchRegisterScope temps(masm); 2920 Register temp = temps.AcquireSameSizeAs(array); 2921 if (instruction->GetArray()->IsIntermediateAddress()) { 2922 // We do not need to compute the intermediate address from the array: the 2923 // input instruction has done it already. See the comment in 2924 // `TryExtractArrayAccessAddress()`. 2925 if (kIsDebugBuild) { 2926 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); 2927 DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); 2928 } 2929 temp = array; 2930 } else { 2931 __ Add(temp, array, offset); 2932 } 2933 destination = HeapOperand(temp, 2934 XRegisterFrom(index), 2935 LSL, 2936 DataType::SizeShift(value_type)); 2937 } 2938 { 2939 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2940 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2941 codegen_->Store(value_type, value, destination); 2942 codegen_->MaybeRecordImplicitNullCheck(instruction); 2943 } 2944 } else { 2945 DCHECK(!instruction->GetArray()->IsIntermediateAddress()); 2946 vixl::aarch64::Label done; 2947 SlowPathCodeARM64* slow_path = nullptr; 2948 { 2949 // We use a block to end the scratch scope before the write barrier, thus 2950 // freeing the temporary registers so they can be used in `MarkGCCard`. 2951 UseScratchRegisterScope temps(masm); 2952 Register temp = temps.AcquireSameSizeAs(array); 2953 if (index.IsConstant()) { 2954 offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); 2955 destination = HeapOperand(array, offset); 2956 } else { 2957 destination = HeapOperand(temp, 2958 XRegisterFrom(index), 2959 LSL, 2960 DataType::SizeShift(value_type)); 2961 } 2962 2963 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2964 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2965 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2966 2967 if (may_need_runtime_call_for_type_check) { 2968 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction); 2969 codegen_->AddSlowPath(slow_path); 2970 if (instruction->GetValueCanBeNull()) { 2971 vixl::aarch64::Label non_zero; 2972 __ Cbnz(Register(value), &non_zero); 2973 if (!index.IsConstant()) { 2974 __ Add(temp, array, offset); 2975 } 2976 { 2977 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools 2978 // emitted. 2979 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2980 __ Str(wzr, destination); 2981 codegen_->MaybeRecordImplicitNullCheck(instruction); 2982 } 2983 __ B(&done); 2984 __ Bind(&non_zero); 2985 } 2986 2987 // Note that when Baker read barriers are enabled, the type 2988 // checks are performed without read barriers. This is fine, 2989 // even in the case where a class object is in the from-space 2990 // after the flip, as a comparison involving such a type would 2991 // not produce a false positive; it may of course produce a 2992 // false negative, in which case we would take the ArraySet 2993 // slow path. 2994 2995 Register temp2 = temps.AcquireSameSizeAs(array); 2996 // /* HeapReference<Class> */ temp = array->klass_ 2997 { 2998 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2999 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3000 __ Ldr(temp, HeapOperand(array, class_offset)); 3001 codegen_->MaybeRecordImplicitNullCheck(instruction); 3002 } 3003 GetAssembler()->MaybeUnpoisonHeapReference(temp); 3004 3005 // /* HeapReference<Class> */ temp = temp->component_type_ 3006 __ Ldr(temp, HeapOperand(temp, component_offset)); 3007 // /* HeapReference<Class> */ temp2 = value->klass_ 3008 __ Ldr(temp2, HeapOperand(Register(value), class_offset)); 3009 // If heap poisoning is enabled, no need to unpoison `temp` 3010 // nor `temp2`, as we are comparing two poisoned references. 3011 __ Cmp(temp, temp2); 3012 temps.Release(temp2); 3013 3014 if (instruction->StaticTypeOfArrayIsObjectArray()) { 3015 vixl::aarch64::Label do_put; 3016 __ B(eq, &do_put); 3017 // If heap poisoning is enabled, the `temp` reference has 3018 // not been unpoisoned yet; unpoison it now. 3019 GetAssembler()->MaybeUnpoisonHeapReference(temp); 3020 3021 // /* HeapReference<Class> */ temp = temp->super_class_ 3022 __ Ldr(temp, HeapOperand(temp, super_offset)); 3023 // If heap poisoning is enabled, no need to unpoison 3024 // `temp`, as we are comparing against null below. 3025 __ Cbnz(temp, slow_path->GetEntryLabel()); 3026 __ Bind(&do_put); 3027 } else { 3028 __ B(ne, slow_path->GetEntryLabel()); 3029 } 3030 } 3031 3032 if (kPoisonHeapReferences) { 3033 Register temp2 = temps.AcquireSameSizeAs(array); 3034 DCHECK(value.IsW()); 3035 __ Mov(temp2, value.W()); 3036 GetAssembler()->PoisonHeapReference(temp2); 3037 source = temp2; 3038 } 3039 3040 if (!index.IsConstant()) { 3041 __ Add(temp, array, offset); 3042 } else { 3043 // We no longer need the `temp` here so release it as the store below may 3044 // need a scratch register (if the constant index makes the offset too large) 3045 // and the poisoned `source` could be using the other scratch register. 3046 temps.Release(temp); 3047 } 3048 { 3049 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 3050 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3051 __ Str(source, destination); 3052 3053 if (!may_need_runtime_call_for_type_check) { 3054 codegen_->MaybeRecordImplicitNullCheck(instruction); 3055 } 3056 } 3057 } 3058 3059 codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull()); 3060 3061 if (done.IsLinked()) { 3062 __ Bind(&done); 3063 } 3064 3065 if (slow_path != nullptr) { 3066 __ Bind(slow_path->GetExitLabel()); 3067 } 3068 } 3069 } 3070 3071 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 3072 RegisterSet caller_saves = RegisterSet::Empty(); 3073 InvokeRuntimeCallingConvention calling_convention; 3074 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 3075 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode())); 3076 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 3077 locations->SetInAt(0, Location::RequiresRegister()); 3078 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 3079 } 3080 3081 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 3082 BoundsCheckSlowPathARM64* slow_path = 3083 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction); 3084 codegen_->AddSlowPath(slow_path); 3085 __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); 3086 __ B(slow_path->GetEntryLabel(), hs); 3087 } 3088 3089 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { 3090 LocationSummary* locations = 3091 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 3092 locations->SetInAt(0, Location::RequiresRegister()); 3093 if (check->HasUses()) { 3094 locations->SetOut(Location::SameAsFirstInput()); 3095 } 3096 } 3097 3098 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { 3099 // We assume the class is not null. 3100 SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( 3101 check->GetLoadClass(), check, check->GetDexPc(), true); 3102 codegen_->AddSlowPath(slow_path); 3103 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); 3104 } 3105 3106 static bool IsFloatingPointZeroConstant(HInstruction* inst) { 3107 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero())) 3108 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero())); 3109 } 3110 3111 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) { 3112 FPRegister lhs_reg = InputFPRegisterAt(instruction, 0); 3113 Location rhs_loc = instruction->GetLocations()->InAt(1); 3114 if (rhs_loc.IsConstant()) { 3115 // 0.0 is the only immediate that can be encoded directly in 3116 // an FCMP instruction. 3117 // 3118 // Both the JLS (section 15.20.1) and the JVMS (section 6.5) 3119 // specify that in a floating-point comparison, positive zero 3120 // and negative zero are considered equal, so we can use the 3121 // literal 0.0 for both cases here. 3122 // 3123 // Note however that some methods (Float.equal, Float.compare, 3124 // Float.compareTo, Double.equal, Double.compare, 3125 // Double.compareTo, Math.max, Math.min, StrictMath.max, 3126 // StrictMath.min) consider 0.0 to be (strictly) greater than 3127 // -0.0. So if we ever translate calls to these methods into a 3128 // HCompare instruction, we must handle the -0.0 case with 3129 // care here. 3130 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant())); 3131 __ Fcmp(lhs_reg, 0.0); 3132 } else { 3133 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1)); 3134 } 3135 } 3136 3137 void LocationsBuilderARM64::VisitCompare(HCompare* compare) { 3138 LocationSummary* locations = 3139 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 3140 DataType::Type in_type = compare->InputAt(0)->GetType(); 3141 switch (in_type) { 3142 case DataType::Type::kBool: 3143 case DataType::Type::kUint8: 3144 case DataType::Type::kInt8: 3145 case DataType::Type::kUint16: 3146 case DataType::Type::kInt16: 3147 case DataType::Type::kInt32: 3148 case DataType::Type::kInt64: { 3149 locations->SetInAt(0, Location::RequiresRegister()); 3150 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); 3151 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3152 break; 3153 } 3154 case DataType::Type::kFloat32: 3155 case DataType::Type::kFloat64: { 3156 locations->SetInAt(0, Location::RequiresFpuRegister()); 3157 locations->SetInAt(1, 3158 IsFloatingPointZeroConstant(compare->InputAt(1)) 3159 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) 3160 : Location::RequiresFpuRegister()); 3161 locations->SetOut(Location::RequiresRegister()); 3162 break; 3163 } 3164 default: 3165 LOG(FATAL) << "Unexpected type for compare operation " << in_type; 3166 } 3167 } 3168 3169 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { 3170 DataType::Type in_type = compare->InputAt(0)->GetType(); 3171 3172 // 0 if: left == right 3173 // 1 if: left > right 3174 // -1 if: left < right 3175 switch (in_type) { 3176 case DataType::Type::kBool: 3177 case DataType::Type::kUint8: 3178 case DataType::Type::kInt8: 3179 case DataType::Type::kUint16: 3180 case DataType::Type::kInt16: 3181 case DataType::Type::kInt32: 3182 case DataType::Type::kInt64: { 3183 Register result = OutputRegister(compare); 3184 Register left = InputRegisterAt(compare, 0); 3185 Operand right = InputOperandAt(compare, 1); 3186 __ Cmp(left, right); 3187 __ Cset(result, ne); // result == +1 if NE or 0 otherwise 3188 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise 3189 break; 3190 } 3191 case DataType::Type::kFloat32: 3192 case DataType::Type::kFloat64: { 3193 Register result = OutputRegister(compare); 3194 GenerateFcmp(compare); 3195 __ Cset(result, ne); 3196 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias())); 3197 break; 3198 } 3199 default: 3200 LOG(FATAL) << "Unimplemented compare type " << in_type; 3201 } 3202 } 3203 3204 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { 3205 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 3206 3207 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 3208 locations->SetInAt(0, Location::RequiresFpuRegister()); 3209 locations->SetInAt(1, 3210 IsFloatingPointZeroConstant(instruction->InputAt(1)) 3211 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) 3212 : Location::RequiresFpuRegister()); 3213 } else { 3214 // Integer cases. 3215 locations->SetInAt(0, Location::RequiresRegister()); 3216 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 3217 } 3218 3219 if (!instruction->IsEmittedAtUseSite()) { 3220 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3221 } 3222 } 3223 3224 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { 3225 if (instruction->IsEmittedAtUseSite()) { 3226 return; 3227 } 3228 3229 LocationSummary* locations = instruction->GetLocations(); 3230 Register res = RegisterFrom(locations->Out(), instruction->GetType()); 3231 IfCondition if_cond = instruction->GetCondition(); 3232 3233 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 3234 GenerateFcmp(instruction); 3235 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); 3236 } else { 3237 // Integer cases. 3238 Register lhs = InputRegisterAt(instruction, 0); 3239 Operand rhs = InputOperandAt(instruction, 1); 3240 __ Cmp(lhs, rhs); 3241 __ Cset(res, ARM64Condition(if_cond)); 3242 } 3243 } 3244 3245 #define FOR_EACH_CONDITION_INSTRUCTION(M) \ 3246 M(Equal) \ 3247 M(NotEqual) \ 3248 M(LessThan) \ 3249 M(LessThanOrEqual) \ 3250 M(GreaterThan) \ 3251 M(GreaterThanOrEqual) \ 3252 M(Below) \ 3253 M(BelowOrEqual) \ 3254 M(Above) \ 3255 M(AboveOrEqual) 3256 #define DEFINE_CONDITION_VISITORS(Name) \ 3257 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \ 3258 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } 3259 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) 3260 #undef DEFINE_CONDITION_VISITORS 3261 #undef FOR_EACH_CONDITION_INSTRUCTION 3262 3263 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 3264 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3265 3266 LocationSummary* locations = instruction->GetLocations(); 3267 Location second = locations->InAt(1); 3268 DCHECK(second.IsConstant()); 3269 3270 Register out = OutputRegister(instruction); 3271 Register dividend = InputRegisterAt(instruction, 0); 3272 int64_t imm = Int64FromConstant(second.GetConstant()); 3273 DCHECK(imm == 1 || imm == -1); 3274 3275 if (instruction->IsRem()) { 3276 __ Mov(out, 0); 3277 } else { 3278 if (imm == 1) { 3279 __ Mov(out, dividend); 3280 } else { 3281 __ Neg(out, dividend); 3282 } 3283 } 3284 } 3285 3286 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { 3287 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3288 3289 LocationSummary* locations = instruction->GetLocations(); 3290 Location second = locations->InAt(1); 3291 DCHECK(second.IsConstant()); 3292 3293 Register out = OutputRegister(instruction); 3294 Register dividend = InputRegisterAt(instruction, 0); 3295 int64_t imm = Int64FromConstant(second.GetConstant()); 3296 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); 3297 int ctz_imm = CTZ(abs_imm); 3298 3299 UseScratchRegisterScope temps(GetVIXLAssembler()); 3300 Register temp = temps.AcquireSameSizeAs(out); 3301 3302 if (instruction->IsDiv()) { 3303 __ Add(temp, dividend, abs_imm - 1); 3304 __ Cmp(dividend, 0); 3305 __ Csel(out, temp, dividend, lt); 3306 if (imm > 0) { 3307 __ Asr(out, out, ctz_imm); 3308 } else { 3309 __ Neg(out, Operand(out, ASR, ctz_imm)); 3310 } 3311 } else { 3312 int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64; 3313 __ Asr(temp, dividend, bits - 1); 3314 __ Lsr(temp, temp, bits - ctz_imm); 3315 __ Add(out, dividend, temp); 3316 __ And(out, out, abs_imm - 1); 3317 __ Sub(out, out, temp); 3318 } 3319 } 3320 3321 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3322 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3323 3324 LocationSummary* locations = instruction->GetLocations(); 3325 Location second = locations->InAt(1); 3326 DCHECK(second.IsConstant()); 3327 3328 Register out = OutputRegister(instruction); 3329 Register dividend = InputRegisterAt(instruction, 0); 3330 int64_t imm = Int64FromConstant(second.GetConstant()); 3331 3332 DataType::Type type = instruction->GetResultType(); 3333 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 3334 3335 int64_t magic; 3336 int shift; 3337 CalculateMagicAndShiftForDivRem( 3338 imm, type == DataType::Type::kInt64 /* is_long */, &magic, &shift); 3339 3340 UseScratchRegisterScope temps(GetVIXLAssembler()); 3341 Register temp = temps.AcquireSameSizeAs(out); 3342 3343 // temp = get_high(dividend * magic) 3344 __ Mov(temp, magic); 3345 if (type == DataType::Type::kInt64) { 3346 __ Smulh(temp, dividend, temp); 3347 } else { 3348 __ Smull(temp.X(), dividend, temp); 3349 __ Lsr(temp.X(), temp.X(), 32); 3350 } 3351 3352 if (imm > 0 && magic < 0) { 3353 __ Add(temp, temp, dividend); 3354 } else if (imm < 0 && magic > 0) { 3355 __ Sub(temp, temp, dividend); 3356 } 3357 3358 if (shift != 0) { 3359 __ Asr(temp, temp, shift); 3360 } 3361 3362 if (instruction->IsDiv()) { 3363 __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); 3364 } else { 3365 __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); 3366 // TODO: Strength reduction for msub. 3367 Register temp_imm = temps.AcquireSameSizeAs(out); 3368 __ Mov(temp_imm, imm); 3369 __ Msub(out, temp, temp_imm, dividend); 3370 } 3371 } 3372 3373 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { 3374 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3375 DataType::Type type = instruction->GetResultType(); 3376 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 3377 3378 LocationSummary* locations = instruction->GetLocations(); 3379 Register out = OutputRegister(instruction); 3380 Location second = locations->InAt(1); 3381 3382 if (second.IsConstant()) { 3383 int64_t imm = Int64FromConstant(second.GetConstant()); 3384 3385 if (imm == 0) { 3386 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 3387 } else if (imm == 1 || imm == -1) { 3388 DivRemOneOrMinusOne(instruction); 3389 } else if (IsPowerOfTwo(AbsOrMin(imm))) { 3390 DivRemByPowerOfTwo(instruction); 3391 } else { 3392 DCHECK(imm <= -2 || imm >= 2); 3393 GenerateDivRemWithAnyConstant(instruction); 3394 } 3395 } else { 3396 Register dividend = InputRegisterAt(instruction, 0); 3397 Register divisor = InputRegisterAt(instruction, 1); 3398 if (instruction->IsDiv()) { 3399 __ Sdiv(out, dividend, divisor); 3400 } else { 3401 UseScratchRegisterScope temps(GetVIXLAssembler()); 3402 Register temp = temps.AcquireSameSizeAs(out); 3403 __ Sdiv(temp, dividend, divisor); 3404 __ Msub(out, temp, divisor, dividend); 3405 } 3406 } 3407 } 3408 3409 void LocationsBuilderARM64::VisitDiv(HDiv* div) { 3410 LocationSummary* locations = 3411 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); 3412 switch (div->GetResultType()) { 3413 case DataType::Type::kInt32: 3414 case DataType::Type::kInt64: 3415 locations->SetInAt(0, Location::RequiresRegister()); 3416 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3417 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3418 break; 3419 3420 case DataType::Type::kFloat32: 3421 case DataType::Type::kFloat64: 3422 locations->SetInAt(0, Location::RequiresFpuRegister()); 3423 locations->SetInAt(1, Location::RequiresFpuRegister()); 3424 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3425 break; 3426 3427 default: 3428 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3429 } 3430 } 3431 3432 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { 3433 DataType::Type type = div->GetResultType(); 3434 switch (type) { 3435 case DataType::Type::kInt32: 3436 case DataType::Type::kInt64: 3437 GenerateDivRemIntegral(div); 3438 break; 3439 3440 case DataType::Type::kFloat32: 3441 case DataType::Type::kFloat64: 3442 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1)); 3443 break; 3444 3445 default: 3446 LOG(FATAL) << "Unexpected div type " << type; 3447 } 3448 } 3449 3450 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3451 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 3452 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 3453 } 3454 3455 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3456 SlowPathCodeARM64* slow_path = 3457 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction); 3458 codegen_->AddSlowPath(slow_path); 3459 Location value = instruction->GetLocations()->InAt(0); 3460 3461 DataType::Type type = instruction->GetType(); 3462 3463 if (!DataType::IsIntegralType(type)) { 3464 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; 3465 return; 3466 } 3467 3468 if (value.IsConstant()) { 3469 int64_t divisor = Int64ConstantFrom(value); 3470 if (divisor == 0) { 3471 __ B(slow_path->GetEntryLabel()); 3472 } else { 3473 // A division by a non-null constant is valid. We don't need to perform 3474 // any check, so simply fall through. 3475 } 3476 } else { 3477 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); 3478 } 3479 } 3480 3481 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { 3482 LocationSummary* locations = 3483 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3484 locations->SetOut(Location::ConstantLocation(constant)); 3485 } 3486 3487 void InstructionCodeGeneratorARM64::VisitDoubleConstant( 3488 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 3489 // Will be generated at use site. 3490 } 3491 3492 void LocationsBuilderARM64::VisitExit(HExit* exit) { 3493 exit->SetLocations(nullptr); 3494 } 3495 3496 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 3497 } 3498 3499 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { 3500 LocationSummary* locations = 3501 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3502 locations->SetOut(Location::ConstantLocation(constant)); 3503 } 3504 3505 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 3506 // Will be generated at use site. 3507 } 3508 3509 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 3510 if (successor->IsExitBlock()) { 3511 DCHECK(got->GetPrevious()->AlwaysThrows()); 3512 return; // no code needed 3513 } 3514 3515 HBasicBlock* block = got->GetBlock(); 3516 HInstruction* previous = got->GetPrevious(); 3517 HLoopInformation* info = block->GetLoopInformation(); 3518 3519 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 3520 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { 3521 UseScratchRegisterScope temps(GetVIXLAssembler()); 3522 Register temp1 = temps.AcquireX(); 3523 Register temp2 = temps.AcquireX(); 3524 __ Ldr(temp1, MemOperand(sp, 0)); 3525 __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); 3526 __ Add(temp2, temp2, 1); 3527 __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); 3528 } 3529 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 3530 return; 3531 } 3532 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 3533 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 3534 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 3535 } 3536 if (!codegen_->GoesToNextBlock(block, successor)) { 3537 __ B(codegen_->GetLabelOf(successor)); 3538 } 3539 } 3540 3541 void LocationsBuilderARM64::VisitGoto(HGoto* got) { 3542 got->SetLocations(nullptr); 3543 } 3544 3545 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { 3546 HandleGoto(got, got->GetSuccessor()); 3547 } 3548 3549 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3550 try_boundary->SetLocations(nullptr); 3551 } 3552 3553 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3554 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 3555 if (!successor->IsExitBlock()) { 3556 HandleGoto(try_boundary, successor); 3557 } 3558 } 3559 3560 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, 3561 size_t condition_input_index, 3562 vixl::aarch64::Label* true_target, 3563 vixl::aarch64::Label* false_target) { 3564 HInstruction* cond = instruction->InputAt(condition_input_index); 3565 3566 if (true_target == nullptr && false_target == nullptr) { 3567 // Nothing to do. The code always falls through. 3568 return; 3569 } else if (cond->IsIntConstant()) { 3570 // Constant condition, statically compared against "true" (integer value 1). 3571 if (cond->AsIntConstant()->IsTrue()) { 3572 if (true_target != nullptr) { 3573 __ B(true_target); 3574 } 3575 } else { 3576 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 3577 if (false_target != nullptr) { 3578 __ B(false_target); 3579 } 3580 } 3581 return; 3582 } 3583 3584 // The following code generates these patterns: 3585 // (1) true_target == nullptr && false_target != nullptr 3586 // - opposite condition true => branch to false_target 3587 // (2) true_target != nullptr && false_target == nullptr 3588 // - condition true => branch to true_target 3589 // (3) true_target != nullptr && false_target != nullptr 3590 // - condition true => branch to true_target 3591 // - branch to false_target 3592 if (IsBooleanValueOrMaterializedCondition(cond)) { 3593 // The condition instruction has been materialized, compare the output to 0. 3594 Location cond_val = instruction->GetLocations()->InAt(condition_input_index); 3595 DCHECK(cond_val.IsRegister()); 3596 if (true_target == nullptr) { 3597 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); 3598 } else { 3599 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); 3600 } 3601 } else { 3602 // The condition instruction has not been materialized, use its inputs as 3603 // the comparison and its condition as the branch condition. 3604 HCondition* condition = cond->AsCondition(); 3605 3606 DataType::Type type = condition->InputAt(0)->GetType(); 3607 if (DataType::IsFloatingPointType(type)) { 3608 GenerateFcmp(condition); 3609 if (true_target == nullptr) { 3610 IfCondition opposite_condition = condition->GetOppositeCondition(); 3611 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target); 3612 } else { 3613 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target); 3614 } 3615 } else { 3616 // Integer cases. 3617 Register lhs = InputRegisterAt(condition, 0); 3618 Operand rhs = InputOperandAt(condition, 1); 3619 3620 Condition arm64_cond; 3621 vixl::aarch64::Label* non_fallthrough_target; 3622 if (true_target == nullptr) { 3623 arm64_cond = ARM64Condition(condition->GetOppositeCondition()); 3624 non_fallthrough_target = false_target; 3625 } else { 3626 arm64_cond = ARM64Condition(condition->GetCondition()); 3627 non_fallthrough_target = true_target; 3628 } 3629 3630 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) && 3631 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) { 3632 switch (arm64_cond) { 3633 case eq: 3634 __ Cbz(lhs, non_fallthrough_target); 3635 break; 3636 case ne: 3637 __ Cbnz(lhs, non_fallthrough_target); 3638 break; 3639 case lt: 3640 // Test the sign bit and branch accordingly. 3641 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3642 break; 3643 case ge: 3644 // Test the sign bit and branch accordingly. 3645 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3646 break; 3647 default: 3648 // Without the `static_cast` the compiler throws an error for 3649 // `-Werror=sign-promo`. 3650 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); 3651 } 3652 } else { 3653 __ Cmp(lhs, rhs); 3654 __ B(arm64_cond, non_fallthrough_target); 3655 } 3656 } 3657 } 3658 3659 // If neither branch falls through (case 3), the conditional branch to `true_target` 3660 // was already emitted (case 2) and we need to emit a jump to `false_target`. 3661 if (true_target != nullptr && false_target != nullptr) { 3662 __ B(false_target); 3663 } 3664 } 3665 3666 void LocationsBuilderARM64::VisitIf(HIf* if_instr) { 3667 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 3668 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 3669 locations->SetInAt(0, Location::RequiresRegister()); 3670 } 3671 } 3672 3673 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { 3674 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 3675 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 3676 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor); 3677 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) { 3678 true_target = nullptr; 3679 } 3680 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor); 3681 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { 3682 false_target = nullptr; 3683 } 3684 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); 3685 } 3686 3687 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3688 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3689 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 3690 InvokeRuntimeCallingConvention calling_convention; 3691 RegisterSet caller_saves = RegisterSet::Empty(); 3692 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 3693 locations->SetCustomSlowPathCallerSaves(caller_saves); 3694 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 3695 locations->SetInAt(0, Location::RequiresRegister()); 3696 } 3697 } 3698 3699 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3700 SlowPathCodeARM64* slow_path = 3701 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); 3702 GenerateTestAndBranch(deoptimize, 3703 /* condition_input_index */ 0, 3704 slow_path->GetEntryLabel(), 3705 /* false_target */ nullptr); 3706 } 3707 3708 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3709 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3710 LocationSummary(flag, LocationSummary::kNoCall); 3711 locations->SetOut(Location::RequiresRegister()); 3712 } 3713 3714 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3715 __ Ldr(OutputRegister(flag), 3716 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 3717 } 3718 3719 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { 3720 return condition->IsCondition() && 3721 DataType::IsFloatingPointType(condition->InputAt(0)->GetType()); 3722 } 3723 3724 static inline Condition GetConditionForSelect(HCondition* condition) { 3725 IfCondition cond = condition->AsCondition()->GetCondition(); 3726 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias()) 3727 : ARM64Condition(cond); 3728 } 3729 3730 void LocationsBuilderARM64::VisitSelect(HSelect* select) { 3731 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 3732 if (DataType::IsFloatingPointType(select->GetType())) { 3733 locations->SetInAt(0, Location::RequiresFpuRegister()); 3734 locations->SetInAt(1, Location::RequiresFpuRegister()); 3735 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3736 } else { 3737 HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); 3738 HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); 3739 bool is_true_value_constant = cst_true_value != nullptr; 3740 bool is_false_value_constant = cst_false_value != nullptr; 3741 // Ask VIXL whether we should synthesize constants in registers. 3742 // We give an arbitrary register to VIXL when dealing with non-constant inputs. 3743 Operand true_op = is_true_value_constant ? 3744 Operand(Int64FromConstant(cst_true_value)) : Operand(x1); 3745 Operand false_op = is_false_value_constant ? 3746 Operand(Int64FromConstant(cst_false_value)) : Operand(x2); 3747 bool true_value_in_register = false; 3748 bool false_value_in_register = false; 3749 MacroAssembler::GetCselSynthesisInformation( 3750 x0, true_op, false_op, &true_value_in_register, &false_value_in_register); 3751 true_value_in_register |= !is_true_value_constant; 3752 false_value_in_register |= !is_false_value_constant; 3753 3754 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister() 3755 : Location::ConstantLocation(cst_true_value)); 3756 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister() 3757 : Location::ConstantLocation(cst_false_value)); 3758 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3759 } 3760 3761 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 3762 locations->SetInAt(2, Location::RequiresRegister()); 3763 } 3764 } 3765 3766 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { 3767 HInstruction* cond = select->GetCondition(); 3768 Condition csel_cond; 3769 3770 if (IsBooleanValueOrMaterializedCondition(cond)) { 3771 if (cond->IsCondition() && cond->GetNext() == select) { 3772 // Use the condition flags set by the previous instruction. 3773 csel_cond = GetConditionForSelect(cond->AsCondition()); 3774 } else { 3775 __ Cmp(InputRegisterAt(select, 2), 0); 3776 csel_cond = ne; 3777 } 3778 } else if (IsConditionOnFloatingPointValues(cond)) { 3779 GenerateFcmp(cond); 3780 csel_cond = GetConditionForSelect(cond->AsCondition()); 3781 } else { 3782 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1)); 3783 csel_cond = GetConditionForSelect(cond->AsCondition()); 3784 } 3785 3786 if (DataType::IsFloatingPointType(select->GetType())) { 3787 __ Fcsel(OutputFPRegister(select), 3788 InputFPRegisterAt(select, 1), 3789 InputFPRegisterAt(select, 0), 3790 csel_cond); 3791 } else { 3792 __ Csel(OutputRegister(select), 3793 InputOperandAt(select, 1), 3794 InputOperandAt(select, 0), 3795 csel_cond); 3796 } 3797 } 3798 3799 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 3800 new (GetGraph()->GetAllocator()) LocationSummary(info); 3801 } 3802 3803 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) { 3804 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 3805 } 3806 3807 void CodeGeneratorARM64::GenerateNop() { 3808 __ Nop(); 3809 } 3810 3811 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3812 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3813 } 3814 3815 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3816 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3817 } 3818 3819 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3820 HandleFieldSet(instruction); 3821 } 3822 3823 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3824 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 3825 } 3826 3827 // Temp is used for read barrier. 3828 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 3829 if (kEmitCompilerReadBarrier && 3830 (kUseBakerReadBarrier || 3831 type_check_kind == TypeCheckKind::kAbstractClassCheck || 3832 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 3833 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 3834 return 1; 3835 } 3836 return 0; 3837 } 3838 3839 // Interface case has 3 temps, one for holding the number of interfaces, one for the current 3840 // interface pointer, one for loading the current interface. 3841 // The other checks have one temp for loading the object's class. 3842 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 3843 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 3844 return 3; 3845 } 3846 return 1 + NumberOfInstanceOfTemps(type_check_kind); 3847 } 3848 3849 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { 3850 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 3851 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3852 bool baker_read_barrier_slow_path = false; 3853 switch (type_check_kind) { 3854 case TypeCheckKind::kExactCheck: 3855 case TypeCheckKind::kAbstractClassCheck: 3856 case TypeCheckKind::kClassHierarchyCheck: 3857 case TypeCheckKind::kArrayObjectCheck: { 3858 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 3859 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 3860 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 3861 break; 3862 } 3863 case TypeCheckKind::kArrayCheck: 3864 case TypeCheckKind::kUnresolvedCheck: 3865 case TypeCheckKind::kInterfaceCheck: 3866 call_kind = LocationSummary::kCallOnSlowPath; 3867 break; 3868 } 3869 3870 LocationSummary* locations = 3871 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 3872 if (baker_read_barrier_slow_path) { 3873 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 3874 } 3875 locations->SetInAt(0, Location::RequiresRegister()); 3876 locations->SetInAt(1, Location::RequiresRegister()); 3877 // The "out" register is used as a temporary, so it overlaps with the inputs. 3878 // Note that TypeCheckSlowPathARM64 uses this register too. 3879 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 3880 // Add temps if necessary for read barriers. 3881 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 3882 } 3883 3884 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { 3885 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3886 LocationSummary* locations = instruction->GetLocations(); 3887 Location obj_loc = locations->InAt(0); 3888 Register obj = InputRegisterAt(instruction, 0); 3889 Register cls = InputRegisterAt(instruction, 1); 3890 Location out_loc = locations->Out(); 3891 Register out = OutputRegister(instruction); 3892 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 3893 DCHECK_LE(num_temps, 1u); 3894 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); 3895 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3896 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3897 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3898 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3899 3900 vixl::aarch64::Label done, zero; 3901 SlowPathCodeARM64* slow_path = nullptr; 3902 3903 // Return 0 if `obj` is null. 3904 // Avoid null check if we know `obj` is not null. 3905 if (instruction->MustDoNullCheck()) { 3906 __ Cbz(obj, &zero); 3907 } 3908 3909 switch (type_check_kind) { 3910 case TypeCheckKind::kExactCheck: { 3911 ReadBarrierOption read_barrier_option = 3912 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3913 // /* HeapReference<Class> */ out = obj->klass_ 3914 GenerateReferenceLoadTwoRegisters(instruction, 3915 out_loc, 3916 obj_loc, 3917 class_offset, 3918 maybe_temp_loc, 3919 read_barrier_option); 3920 __ Cmp(out, cls); 3921 __ Cset(out, eq); 3922 if (zero.IsLinked()) { 3923 __ B(&done); 3924 } 3925 break; 3926 } 3927 3928 case TypeCheckKind::kAbstractClassCheck: { 3929 ReadBarrierOption read_barrier_option = 3930 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3931 // /* HeapReference<Class> */ out = obj->klass_ 3932 GenerateReferenceLoadTwoRegisters(instruction, 3933 out_loc, 3934 obj_loc, 3935 class_offset, 3936 maybe_temp_loc, 3937 read_barrier_option); 3938 // If the class is abstract, we eagerly fetch the super class of the 3939 // object to avoid doing a comparison we know will fail. 3940 vixl::aarch64::Label loop, success; 3941 __ Bind(&loop); 3942 // /* HeapReference<Class> */ out = out->super_class_ 3943 GenerateReferenceLoadOneRegister(instruction, 3944 out_loc, 3945 super_offset, 3946 maybe_temp_loc, 3947 read_barrier_option); 3948 // If `out` is null, we use it for the result, and jump to `done`. 3949 __ Cbz(out, &done); 3950 __ Cmp(out, cls); 3951 __ B(ne, &loop); 3952 __ Mov(out, 1); 3953 if (zero.IsLinked()) { 3954 __ B(&done); 3955 } 3956 break; 3957 } 3958 3959 case TypeCheckKind::kClassHierarchyCheck: { 3960 ReadBarrierOption read_barrier_option = 3961 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3962 // /* HeapReference<Class> */ out = obj->klass_ 3963 GenerateReferenceLoadTwoRegisters(instruction, 3964 out_loc, 3965 obj_loc, 3966 class_offset, 3967 maybe_temp_loc, 3968 read_barrier_option); 3969 // Walk over the class hierarchy to find a match. 3970 vixl::aarch64::Label loop, success; 3971 __ Bind(&loop); 3972 __ Cmp(out, cls); 3973 __ B(eq, &success); 3974 // /* HeapReference<Class> */ out = out->super_class_ 3975 GenerateReferenceLoadOneRegister(instruction, 3976 out_loc, 3977 super_offset, 3978 maybe_temp_loc, 3979 read_barrier_option); 3980 __ Cbnz(out, &loop); 3981 // If `out` is null, we use it for the result, and jump to `done`. 3982 __ B(&done); 3983 __ Bind(&success); 3984 __ Mov(out, 1); 3985 if (zero.IsLinked()) { 3986 __ B(&done); 3987 } 3988 break; 3989 } 3990 3991 case TypeCheckKind::kArrayObjectCheck: { 3992 ReadBarrierOption read_barrier_option = 3993 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3994 // /* HeapReference<Class> */ out = obj->klass_ 3995 GenerateReferenceLoadTwoRegisters(instruction, 3996 out_loc, 3997 obj_loc, 3998 class_offset, 3999 maybe_temp_loc, 4000 read_barrier_option); 4001 // Do an exact check. 4002 vixl::aarch64::Label exact_check; 4003 __ Cmp(out, cls); 4004 __ B(eq, &exact_check); 4005 // Otherwise, we need to check that the object's class is a non-primitive array. 4006 // /* HeapReference<Class> */ out = out->component_type_ 4007 GenerateReferenceLoadOneRegister(instruction, 4008 out_loc, 4009 component_offset, 4010 maybe_temp_loc, 4011 read_barrier_option); 4012 // If `out` is null, we use it for the result, and jump to `done`. 4013 __ Cbz(out, &done); 4014 __ Ldrh(out, HeapOperand(out, primitive_offset)); 4015 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 4016 __ Cbnz(out, &zero); 4017 __ Bind(&exact_check); 4018 __ Mov(out, 1); 4019 __ B(&done); 4020 break; 4021 } 4022 4023 case TypeCheckKind::kArrayCheck: { 4024 // No read barrier since the slow path will retry upon failure. 4025 // /* HeapReference<Class> */ out = obj->klass_ 4026 GenerateReferenceLoadTwoRegisters(instruction, 4027 out_loc, 4028 obj_loc, 4029 class_offset, 4030 maybe_temp_loc, 4031 kWithoutReadBarrier); 4032 __ Cmp(out, cls); 4033 DCHECK(locations->OnlyCallsOnSlowPath()); 4034 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 4035 instruction, /* is_fatal */ false); 4036 codegen_->AddSlowPath(slow_path); 4037 __ B(ne, slow_path->GetEntryLabel()); 4038 __ Mov(out, 1); 4039 if (zero.IsLinked()) { 4040 __ B(&done); 4041 } 4042 break; 4043 } 4044 4045 case TypeCheckKind::kUnresolvedCheck: 4046 case TypeCheckKind::kInterfaceCheck: { 4047 // Note that we indeed only call on slow path, but we always go 4048 // into the slow path for the unresolved and interface check 4049 // cases. 4050 // 4051 // We cannot directly call the InstanceofNonTrivial runtime 4052 // entry point without resorting to a type checking slow path 4053 // here (i.e. by calling InvokeRuntime directly), as it would 4054 // require to assign fixed registers for the inputs of this 4055 // HInstanceOf instruction (following the runtime calling 4056 // convention), which might be cluttered by the potential first 4057 // read barrier emission at the beginning of this method. 4058 // 4059 // TODO: Introduce a new runtime entry point taking the object 4060 // to test (instead of its class) as argument, and let it deal 4061 // with the read barrier issues. This will let us refactor this 4062 // case of the `switch` code as it was previously (with a direct 4063 // call to the runtime not using a type checking slow path). 4064 // This should also be beneficial for the other cases above. 4065 DCHECK(locations->OnlyCallsOnSlowPath()); 4066 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 4067 instruction, /* is_fatal */ false); 4068 codegen_->AddSlowPath(slow_path); 4069 __ B(slow_path->GetEntryLabel()); 4070 if (zero.IsLinked()) { 4071 __ B(&done); 4072 } 4073 break; 4074 } 4075 } 4076 4077 if (zero.IsLinked()) { 4078 __ Bind(&zero); 4079 __ Mov(out, 0); 4080 } 4081 4082 if (done.IsLinked()) { 4083 __ Bind(&done); 4084 } 4085 4086 if (slow_path != nullptr) { 4087 __ Bind(slow_path->GetExitLabel()); 4088 } 4089 } 4090 4091 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { 4092 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 4093 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 4094 LocationSummary* locations = 4095 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 4096 locations->SetInAt(0, Location::RequiresRegister()); 4097 locations->SetInAt(1, Location::RequiresRegister()); 4098 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. 4099 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 4100 } 4101 4102 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { 4103 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 4104 LocationSummary* locations = instruction->GetLocations(); 4105 Location obj_loc = locations->InAt(0); 4106 Register obj = InputRegisterAt(instruction, 0); 4107 Register cls = InputRegisterAt(instruction, 1); 4108 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 4109 DCHECK_GE(num_temps, 1u); 4110 DCHECK_LE(num_temps, 3u); 4111 Location temp_loc = locations->GetTemp(0); 4112 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); 4113 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); 4114 Register temp = WRegisterFrom(temp_loc); 4115 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 4116 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 4117 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 4118 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 4119 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 4120 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 4121 const uint32_t object_array_data_offset = 4122 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 4123 4124 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 4125 SlowPathCodeARM64* type_check_slow_path = 4126 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 4127 instruction, is_type_check_slow_path_fatal); 4128 codegen_->AddSlowPath(type_check_slow_path); 4129 4130 vixl::aarch64::Label done; 4131 // Avoid null check if we know obj is not null. 4132 if (instruction->MustDoNullCheck()) { 4133 __ Cbz(obj, &done); 4134 } 4135 4136 switch (type_check_kind) { 4137 case TypeCheckKind::kExactCheck: 4138 case TypeCheckKind::kArrayCheck: { 4139 // /* HeapReference<Class> */ temp = obj->klass_ 4140 GenerateReferenceLoadTwoRegisters(instruction, 4141 temp_loc, 4142 obj_loc, 4143 class_offset, 4144 maybe_temp2_loc, 4145 kWithoutReadBarrier); 4146 4147 __ Cmp(temp, cls); 4148 // Jump to slow path for throwing the exception or doing a 4149 // more involved array check. 4150 __ B(ne, type_check_slow_path->GetEntryLabel()); 4151 break; 4152 } 4153 4154 case TypeCheckKind::kAbstractClassCheck: { 4155 // /* HeapReference<Class> */ temp = obj->klass_ 4156 GenerateReferenceLoadTwoRegisters(instruction, 4157 temp_loc, 4158 obj_loc, 4159 class_offset, 4160 maybe_temp2_loc, 4161 kWithoutReadBarrier); 4162 4163 // If the class is abstract, we eagerly fetch the super class of the 4164 // object to avoid doing a comparison we know will fail. 4165 vixl::aarch64::Label loop; 4166 __ Bind(&loop); 4167 // /* HeapReference<Class> */ temp = temp->super_class_ 4168 GenerateReferenceLoadOneRegister(instruction, 4169 temp_loc, 4170 super_offset, 4171 maybe_temp2_loc, 4172 kWithoutReadBarrier); 4173 4174 // If the class reference currently in `temp` is null, jump to the slow path to throw the 4175 // exception. 4176 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 4177 // Otherwise, compare classes. 4178 __ Cmp(temp, cls); 4179 __ B(ne, &loop); 4180 break; 4181 } 4182 4183 case TypeCheckKind::kClassHierarchyCheck: { 4184 // /* HeapReference<Class> */ temp = obj->klass_ 4185 GenerateReferenceLoadTwoRegisters(instruction, 4186 temp_loc, 4187 obj_loc, 4188 class_offset, 4189 maybe_temp2_loc, 4190 kWithoutReadBarrier); 4191 4192 // Walk over the class hierarchy to find a match. 4193 vixl::aarch64::Label loop; 4194 __ Bind(&loop); 4195 __ Cmp(temp, cls); 4196 __ B(eq, &done); 4197 4198 // /* HeapReference<Class> */ temp = temp->super_class_ 4199 GenerateReferenceLoadOneRegister(instruction, 4200 temp_loc, 4201 super_offset, 4202 maybe_temp2_loc, 4203 kWithoutReadBarrier); 4204 4205 // If the class reference currently in `temp` is not null, jump 4206 // back at the beginning of the loop. 4207 __ Cbnz(temp, &loop); 4208 // Otherwise, jump to the slow path to throw the exception. 4209 __ B(type_check_slow_path->GetEntryLabel()); 4210 break; 4211 } 4212 4213 case TypeCheckKind::kArrayObjectCheck: { 4214 // /* HeapReference<Class> */ temp = obj->klass_ 4215 GenerateReferenceLoadTwoRegisters(instruction, 4216 temp_loc, 4217 obj_loc, 4218 class_offset, 4219 maybe_temp2_loc, 4220 kWithoutReadBarrier); 4221 4222 // Do an exact check. 4223 __ Cmp(temp, cls); 4224 __ B(eq, &done); 4225 4226 // Otherwise, we need to check that the object's class is a non-primitive array. 4227 // /* HeapReference<Class> */ temp = temp->component_type_ 4228 GenerateReferenceLoadOneRegister(instruction, 4229 temp_loc, 4230 component_offset, 4231 maybe_temp2_loc, 4232 kWithoutReadBarrier); 4233 4234 // If the component type is null, jump to the slow path to throw the exception. 4235 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 4236 // Otherwise, the object is indeed an array. Further check that this component type is not a 4237 // primitive type. 4238 __ Ldrh(temp, HeapOperand(temp, primitive_offset)); 4239 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 4240 __ Cbnz(temp, type_check_slow_path->GetEntryLabel()); 4241 break; 4242 } 4243 4244 case TypeCheckKind::kUnresolvedCheck: 4245 // We always go into the type check slow path for the unresolved check cases. 4246 // 4247 // We cannot directly call the CheckCast runtime entry point 4248 // without resorting to a type checking slow path here (i.e. by 4249 // calling InvokeRuntime directly), as it would require to 4250 // assign fixed registers for the inputs of this HInstanceOf 4251 // instruction (following the runtime calling convention), which 4252 // might be cluttered by the potential first read barrier 4253 // emission at the beginning of this method. 4254 __ B(type_check_slow_path->GetEntryLabel()); 4255 break; 4256 case TypeCheckKind::kInterfaceCheck: { 4257 // /* HeapReference<Class> */ temp = obj->klass_ 4258 GenerateReferenceLoadTwoRegisters(instruction, 4259 temp_loc, 4260 obj_loc, 4261 class_offset, 4262 maybe_temp2_loc, 4263 kWithoutReadBarrier); 4264 4265 // /* HeapReference<Class> */ temp = temp->iftable_ 4266 GenerateReferenceLoadTwoRegisters(instruction, 4267 temp_loc, 4268 temp_loc, 4269 iftable_offset, 4270 maybe_temp2_loc, 4271 kWithoutReadBarrier); 4272 // Iftable is never null. 4273 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset)); 4274 // Loop through the iftable and check if any class matches. 4275 vixl::aarch64::Label start_loop; 4276 __ Bind(&start_loop); 4277 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel()); 4278 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset)); 4279 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc)); 4280 // Go to next interface. 4281 __ Add(temp, temp, 2 * kHeapReferenceSize); 4282 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2); 4283 // Compare the classes and continue the loop if they do not match. 4284 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc)); 4285 __ B(ne, &start_loop); 4286 break; 4287 } 4288 } 4289 __ Bind(&done); 4290 4291 __ Bind(type_check_slow_path->GetExitLabel()); 4292 } 4293 4294 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { 4295 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 4296 locations->SetOut(Location::ConstantLocation(constant)); 4297 } 4298 4299 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 4300 // Will be generated at use site. 4301 } 4302 4303 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { 4304 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 4305 locations->SetOut(Location::ConstantLocation(constant)); 4306 } 4307 4308 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 4309 // Will be generated at use site. 4310 } 4311 4312 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 4313 // The trampoline uses the same calling convention as dex calling conventions, 4314 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 4315 // the method_idx. 4316 HandleInvoke(invoke); 4317 } 4318 4319 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 4320 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 4321 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4322 } 4323 4324 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { 4325 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 4326 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 4327 } 4328 4329 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 4330 HandleInvoke(invoke); 4331 } 4332 4333 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 4334 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 4335 LocationSummary* locations = invoke->GetLocations(); 4336 Register temp = XRegisterFrom(locations->GetTemp(0)); 4337 Location receiver = locations->InAt(0); 4338 Offset class_offset = mirror::Object::ClassOffset(); 4339 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 4340 4341 // The register ip1 is required to be used for the hidden argument in 4342 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. 4343 MacroAssembler* masm = GetVIXLAssembler(); 4344 UseScratchRegisterScope scratch_scope(masm); 4345 scratch_scope.Exclude(ip1); 4346 __ Mov(ip1, invoke->GetDexMethodIndex()); 4347 4348 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 4349 if (receiver.IsStackSlot()) { 4350 __ Ldr(temp.W(), StackOperandFrom(receiver)); 4351 { 4352 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4353 // /* HeapReference<Class> */ temp = temp->klass_ 4354 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); 4355 codegen_->MaybeRecordImplicitNullCheck(invoke); 4356 } 4357 } else { 4358 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4359 // /* HeapReference<Class> */ temp = receiver->klass_ 4360 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); 4361 codegen_->MaybeRecordImplicitNullCheck(invoke); 4362 } 4363 4364 // Instead of simply (possibly) unpoisoning `temp` here, we should 4365 // emit a read barrier for the previous class reference load. 4366 // However this is not required in practice, as this is an 4367 // intermediate/temporary reference and because the current 4368 // concurrent copying collector keeps the from-space memory 4369 // intact/accessible until the end of the marking phase (the 4370 // concurrent copying collector may not in the future). 4371 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4372 __ Ldr(temp, 4373 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 4374 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4375 invoke->GetImtIndex(), kArm64PointerSize)); 4376 // temp = temp->GetImtEntryAt(method_offset); 4377 __ Ldr(temp, MemOperand(temp, method_offset)); 4378 // lr = temp->GetEntryPoint(); 4379 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value())); 4380 4381 { 4382 // Ensure the pc position is recorded immediately after the `blr` instruction. 4383 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4384 4385 // lr(); 4386 __ blr(lr); 4387 DCHECK(!codegen_->IsLeafMethod()); 4388 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 4389 } 4390 4391 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4392 } 4393 4394 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4395 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); 4396 if (intrinsic.TryDispatch(invoke)) { 4397 return; 4398 } 4399 4400 HandleInvoke(invoke); 4401 } 4402 4403 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4404 // Explicit clinit checks triggered by static invokes must have been pruned by 4405 // art::PrepareForRegisterAllocation. 4406 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4407 4408 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); 4409 if (intrinsic.TryDispatch(invoke)) { 4410 return; 4411 } 4412 4413 HandleInvoke(invoke); 4414 } 4415 4416 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) { 4417 if (invoke->GetLocations()->Intrinsified()) { 4418 IntrinsicCodeGeneratorARM64 intrinsic(codegen); 4419 intrinsic.Dispatch(invoke); 4420 return true; 4421 } 4422 return false; 4423 } 4424 4425 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( 4426 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 4427 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { 4428 // On ARM64 we support all dispatch types. 4429 return desired_dispatch_info; 4430 } 4431 4432 void CodeGeneratorARM64::GenerateStaticOrDirectCall( 4433 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 4434 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. 4435 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 4436 switch (invoke->GetMethodLoadKind()) { 4437 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 4438 uint32_t offset = 4439 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 4440 // temp = thread->string_init_entrypoint 4441 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset)); 4442 break; 4443 } 4444 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 4445 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 4446 break; 4447 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { 4448 DCHECK(GetCompilerOptions().IsBootImage()); 4449 // Add ADRP with its PC-relative method patch. 4450 vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod()); 4451 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4452 // Add ADD with its PC-relative method patch. 4453 vixl::aarch64::Label* add_label = 4454 NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label); 4455 EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4456 break; 4457 } 4458 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: 4459 // Load method address from literal pool. 4460 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); 4461 break; 4462 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 4463 // Add ADRP with its PC-relative DexCache access patch. 4464 MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 4465 vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); 4466 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4467 // Add LDR with its PC-relative DexCache access patch. 4468 vixl::aarch64::Label* ldr_label = 4469 NewMethodBssEntryPatch(target_method, adrp_label); 4470 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4471 break; 4472 } 4473 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 4474 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 4475 return; // No code pointer retrieval; the runtime performs the call directly. 4476 } 4477 } 4478 4479 switch (invoke->GetCodePtrLocation()) { 4480 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 4481 { 4482 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4483 ExactAssemblyScope eas(GetVIXLAssembler(), 4484 kInstructionSize, 4485 CodeBufferCheckScope::kExactSize); 4486 __ bl(&frame_entry_label_); 4487 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4488 } 4489 break; 4490 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 4491 // LR = callee_method->entry_point_from_quick_compiled_code_; 4492 __ Ldr(lr, MemOperand( 4493 XRegisterFrom(callee_method), 4494 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value())); 4495 { 4496 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4497 ExactAssemblyScope eas(GetVIXLAssembler(), 4498 kInstructionSize, 4499 CodeBufferCheckScope::kExactSize); 4500 // lr() 4501 __ blr(lr); 4502 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4503 } 4504 break; 4505 } 4506 4507 DCHECK(!IsLeafMethod()); 4508 } 4509 4510 void CodeGeneratorARM64::GenerateVirtualCall( 4511 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 4512 // Use the calling convention instead of the location of the receiver, as 4513 // intrinsics may have put the receiver in a different register. In the intrinsics 4514 // slow path, the arguments have been moved to the right place, so here we are 4515 // guaranteed that the receiver is the first register of the calling convention. 4516 InvokeDexCallingConvention calling_convention; 4517 Register receiver = calling_convention.GetRegisterAt(0); 4518 Register temp = XRegisterFrom(temp_in); 4519 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4520 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); 4521 Offset class_offset = mirror::Object::ClassOffset(); 4522 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 4523 4524 DCHECK(receiver.IsRegister()); 4525 4526 { 4527 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 4528 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4529 // /* HeapReference<Class> */ temp = receiver->klass_ 4530 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset)); 4531 MaybeRecordImplicitNullCheck(invoke); 4532 } 4533 // Instead of simply (possibly) unpoisoning `temp` here, we should 4534 // emit a read barrier for the previous class reference load. 4535 // intermediate/temporary reference and because the current 4536 // concurrent copying collector keeps the from-space memory 4537 // intact/accessible until the end of the marking phase (the 4538 // concurrent copying collector may not in the future). 4539 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4540 // temp = temp->GetMethodAt(method_offset); 4541 __ Ldr(temp, MemOperand(temp, method_offset)); 4542 // lr = temp->GetEntryPoint(); 4543 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); 4544 { 4545 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4546 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4547 // lr(); 4548 __ blr(lr); 4549 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4550 } 4551 } 4552 4553 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4554 HandleInvoke(invoke); 4555 } 4556 4557 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4558 codegen_->GenerateInvokePolymorphicCall(invoke); 4559 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4560 } 4561 4562 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( 4563 MethodReference target_method, 4564 vixl::aarch64::Label* adrp_label) { 4565 return NewPcRelativePatch( 4566 target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_); 4567 } 4568 4569 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( 4570 MethodReference target_method, 4571 vixl::aarch64::Label* adrp_label) { 4572 return NewPcRelativePatch( 4573 target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_); 4574 } 4575 4576 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch( 4577 const DexFile& dex_file, 4578 dex::TypeIndex type_index, 4579 vixl::aarch64::Label* adrp_label) { 4580 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_); 4581 } 4582 4583 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( 4584 const DexFile& dex_file, 4585 dex::TypeIndex type_index, 4586 vixl::aarch64::Label* adrp_label) { 4587 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); 4588 } 4589 4590 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch( 4591 const DexFile& dex_file, 4592 dex::StringIndex string_index, 4593 vixl::aarch64::Label* adrp_label) { 4594 return NewPcRelativePatch( 4595 &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_); 4596 } 4597 4598 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( 4599 const DexFile& dex_file, 4600 dex::StringIndex string_index, 4601 vixl::aarch64::Label* adrp_label) { 4602 return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); 4603 } 4604 4605 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { 4606 baker_read_barrier_patches_.emplace_back(custom_data); 4607 return &baker_read_barrier_patches_.back().label; 4608 } 4609 4610 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( 4611 const DexFile* dex_file, 4612 uint32_t offset_or_index, 4613 vixl::aarch64::Label* adrp_label, 4614 ArenaDeque<PcRelativePatchInfo>* patches) { 4615 // Add a patch entry and return the label. 4616 patches->emplace_back(dex_file, offset_or_index); 4617 PcRelativePatchInfo* info = &patches->back(); 4618 vixl::aarch64::Label* label = &info->label; 4619 // If adrp_label is null, this is the ADRP patch and needs to point to its own label. 4620 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label; 4621 return label; 4622 } 4623 4624 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( 4625 uint64_t address) { 4626 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); 4627 } 4628 4629 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( 4630 const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { 4631 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 4632 return jit_string_patches_.GetOrCreate( 4633 StringReference(&dex_file, string_index), 4634 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); 4635 } 4636 4637 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( 4638 const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { 4639 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 4640 return jit_class_patches_.GetOrCreate( 4641 TypeReference(&dex_file, type_index), 4642 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); 4643 } 4644 4645 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, 4646 vixl::aarch64::Register reg) { 4647 DCHECK(reg.IsX()); 4648 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4649 __ Bind(fixup_label); 4650 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0)); 4651 } 4652 4653 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, 4654 vixl::aarch64::Register out, 4655 vixl::aarch64::Register base) { 4656 DCHECK(out.IsX()); 4657 DCHECK(base.IsX()); 4658 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4659 __ Bind(fixup_label); 4660 __ add(out, base, Operand(/* offset placeholder */ 0)); 4661 } 4662 4663 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, 4664 vixl::aarch64::Register out, 4665 vixl::aarch64::Register base) { 4666 DCHECK(base.IsX()); 4667 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4668 __ Bind(fixup_label); 4669 __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); 4670 } 4671 4672 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 4673 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( 4674 const ArenaDeque<PcRelativePatchInfo>& infos, 4675 ArenaVector<linker::LinkerPatch>* linker_patches) { 4676 for (const PcRelativePatchInfo& info : infos) { 4677 linker_patches->push_back(Factory(info.label.GetLocation(), 4678 info.target_dex_file, 4679 info.pc_insn_label->GetLocation(), 4680 info.offset_or_index)); 4681 } 4682 } 4683 4684 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 4685 DCHECK(linker_patches->empty()); 4686 size_t size = 4687 boot_image_method_patches_.size() + 4688 method_bss_entry_patches_.size() + 4689 boot_image_type_patches_.size() + 4690 type_bss_entry_patches_.size() + 4691 boot_image_string_patches_.size() + 4692 string_bss_entry_patches_.size() + 4693 baker_read_barrier_patches_.size(); 4694 linker_patches->reserve(size); 4695 if (GetCompilerOptions().IsBootImage()) { 4696 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 4697 boot_image_method_patches_, linker_patches); 4698 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 4699 boot_image_type_patches_, linker_patches); 4700 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 4701 boot_image_string_patches_, linker_patches); 4702 } else { 4703 DCHECK(boot_image_method_patches_.empty()); 4704 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( 4705 boot_image_type_patches_, linker_patches); 4706 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( 4707 boot_image_string_patches_, linker_patches); 4708 } 4709 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 4710 method_bss_entry_patches_, linker_patches); 4711 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 4712 type_bss_entry_patches_, linker_patches); 4713 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 4714 string_bss_entry_patches_, linker_patches); 4715 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { 4716 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( 4717 info.label.GetLocation(), info.custom_data)); 4718 } 4719 DCHECK_EQ(size, linker_patches->size()); 4720 } 4721 4722 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { 4723 return uint32_literals_.GetOrCreate( 4724 value, 4725 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); 4726 } 4727 4728 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) { 4729 return uint64_literals_.GetOrCreate( 4730 value, 4731 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); }); 4732 } 4733 4734 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4735 // Explicit clinit checks triggered by static invokes must have been pruned by 4736 // art::PrepareForRegisterAllocation. 4737 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4738 4739 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4740 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4741 return; 4742 } 4743 4744 { 4745 // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there 4746 // are no pools emitted. 4747 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4748 LocationSummary* locations = invoke->GetLocations(); 4749 codegen_->GenerateStaticOrDirectCall( 4750 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 4751 } 4752 4753 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4754 } 4755 4756 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4757 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4758 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4759 return; 4760 } 4761 4762 { 4763 // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there 4764 // are no pools emitted. 4765 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4766 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 4767 DCHECK(!codegen_->IsLeafMethod()); 4768 } 4769 4770 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4771 } 4772 4773 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( 4774 HLoadClass::LoadKind desired_class_load_kind) { 4775 switch (desired_class_load_kind) { 4776 case HLoadClass::LoadKind::kInvalid: 4777 LOG(FATAL) << "UNREACHABLE"; 4778 UNREACHABLE(); 4779 case HLoadClass::LoadKind::kReferrersClass: 4780 break; 4781 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 4782 case HLoadClass::LoadKind::kBootImageClassTable: 4783 case HLoadClass::LoadKind::kBssEntry: 4784 DCHECK(!Runtime::Current()->UseJitCompilation()); 4785 break; 4786 case HLoadClass::LoadKind::kJitTableAddress: 4787 DCHECK(Runtime::Current()->UseJitCompilation()); 4788 break; 4789 case HLoadClass::LoadKind::kBootImageAddress: 4790 case HLoadClass::LoadKind::kRuntimeCall: 4791 break; 4792 } 4793 return desired_class_load_kind; 4794 } 4795 4796 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { 4797 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4798 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4799 InvokeRuntimeCallingConvention calling_convention; 4800 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 4801 cls, 4802 LocationFrom(calling_convention.GetRegisterAt(0)), 4803 LocationFrom(vixl::aarch64::x0)); 4804 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0)); 4805 return; 4806 } 4807 DCHECK(!cls->NeedsAccessCheck()); 4808 4809 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 4810 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 4811 ? LocationSummary::kCallOnSlowPath 4812 : LocationSummary::kNoCall; 4813 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 4814 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 4815 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4816 } 4817 4818 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 4819 locations->SetInAt(0, Location::RequiresRegister()); 4820 } 4821 locations->SetOut(Location::RequiresRegister()); 4822 if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { 4823 if (!kUseReadBarrier || kUseBakerReadBarrier) { 4824 // Rely on the type resolution or initialization and marking to save everything we need. 4825 RegisterSet caller_saves = RegisterSet::Empty(); 4826 InvokeRuntimeCallingConvention calling_convention; 4827 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 4828 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), 4829 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), 4830 DataType::Type::kReference).GetCode()); 4831 locations->SetCustomSlowPathCallerSaves(caller_saves); 4832 } else { 4833 // For non-Baker read barrier we have a temp-clobbering call. 4834 } 4835 } 4836 } 4837 4838 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 4839 // move. 4840 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 4841 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4842 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4843 codegen_->GenerateLoadClassRuntimeCall(cls); 4844 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4845 return; 4846 } 4847 DCHECK(!cls->NeedsAccessCheck()); 4848 4849 Location out_loc = cls->GetLocations()->Out(); 4850 Register out = OutputRegister(cls); 4851 4852 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 4853 ? kWithoutReadBarrier 4854 : kCompilerReadBarrierOption; 4855 bool generate_null_check = false; 4856 switch (load_kind) { 4857 case HLoadClass::LoadKind::kReferrersClass: { 4858 DCHECK(!cls->CanCallRuntime()); 4859 DCHECK(!cls->MustGenerateClinitCheck()); 4860 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 4861 Register current_method = InputRegisterAt(cls, 0); 4862 GenerateGcRootFieldLoad(cls, 4863 out_loc, 4864 current_method, 4865 ArtMethod::DeclaringClassOffset().Int32Value(), 4866 /* fixup_label */ nullptr, 4867 read_barrier_option); 4868 break; 4869 } 4870 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { 4871 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4872 // Add ADRP with its PC-relative type patch. 4873 const DexFile& dex_file = cls->GetDexFile(); 4874 dex::TypeIndex type_index = cls->GetTypeIndex(); 4875 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); 4876 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4877 // Add ADD with its PC-relative type patch. 4878 vixl::aarch64::Label* add_label = 4879 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); 4880 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 4881 break; 4882 } 4883 case HLoadClass::LoadKind::kBootImageAddress: { 4884 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4885 uint32_t address = dchecked_integral_cast<uint32_t>( 4886 reinterpret_cast<uintptr_t>(cls->GetClass().Get())); 4887 DCHECK_NE(address, 0u); 4888 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 4889 break; 4890 } 4891 case HLoadClass::LoadKind::kBootImageClassTable: { 4892 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 4893 // Add ADRP with its PC-relative type patch. 4894 const DexFile& dex_file = cls->GetDexFile(); 4895 dex::TypeIndex type_index = cls->GetTypeIndex(); 4896 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); 4897 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4898 // Add LDR with its PC-relative type patch. 4899 vixl::aarch64::Label* ldr_label = 4900 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); 4901 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); 4902 // Extract the reference from the slot data, i.e. clear the hash bits. 4903 int32_t masked_hash = ClassTable::TableSlot::MaskHash( 4904 ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index))); 4905 if (masked_hash != 0) { 4906 __ Sub(out.W(), out.W(), Operand(masked_hash)); 4907 } 4908 break; 4909 } 4910 case HLoadClass::LoadKind::kBssEntry: { 4911 // Add ADRP with its PC-relative Class .bss entry patch. 4912 const DexFile& dex_file = cls->GetDexFile(); 4913 dex::TypeIndex type_index = cls->GetTypeIndex(); 4914 vixl::aarch64::Register temp = XRegisterFrom(out_loc); 4915 vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); 4916 codegen_->EmitAdrpPlaceholder(adrp_label, temp); 4917 // Add LDR with its PC-relative Class patch. 4918 vixl::aarch64::Label* ldr_label = 4919 codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); 4920 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ 4921 GenerateGcRootFieldLoad(cls, 4922 out_loc, 4923 temp, 4924 /* offset placeholder */ 0u, 4925 ldr_label, 4926 read_barrier_option); 4927 generate_null_check = true; 4928 break; 4929 } 4930 case HLoadClass::LoadKind::kJitTableAddress: { 4931 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), 4932 cls->GetTypeIndex(), 4933 cls->GetClass())); 4934 GenerateGcRootFieldLoad(cls, 4935 out_loc, 4936 out.X(), 4937 /* offset */ 0, 4938 /* fixup_label */ nullptr, 4939 read_barrier_option); 4940 break; 4941 } 4942 case HLoadClass::LoadKind::kRuntimeCall: 4943 case HLoadClass::LoadKind::kInvalid: 4944 LOG(FATAL) << "UNREACHABLE"; 4945 UNREACHABLE(); 4946 } 4947 4948 bool do_clinit = cls->MustGenerateClinitCheck(); 4949 if (generate_null_check || do_clinit) { 4950 DCHECK(cls->CanCallRuntime()); 4951 SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( 4952 cls, cls, cls->GetDexPc(), do_clinit); 4953 codegen_->AddSlowPath(slow_path); 4954 if (generate_null_check) { 4955 __ Cbz(out, slow_path->GetEntryLabel()); 4956 } 4957 if (cls->MustGenerateClinitCheck()) { 4958 GenerateClassInitializationCheck(slow_path, out); 4959 } else { 4960 __ Bind(slow_path->GetExitLabel()); 4961 } 4962 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 4963 } 4964 } 4965 4966 static MemOperand GetExceptionTlsAddress() { 4967 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); 4968 } 4969 4970 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { 4971 LocationSummary* locations = 4972 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 4973 locations->SetOut(Location::RequiresRegister()); 4974 } 4975 4976 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) { 4977 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress()); 4978 } 4979 4980 void LocationsBuilderARM64::VisitClearException(HClearException* clear) { 4981 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 4982 } 4983 4984 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 4985 __ Str(wzr, GetExceptionTlsAddress()); 4986 } 4987 4988 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( 4989 HLoadString::LoadKind desired_string_load_kind) { 4990 switch (desired_string_load_kind) { 4991 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 4992 case HLoadString::LoadKind::kBootImageInternTable: 4993 case HLoadString::LoadKind::kBssEntry: 4994 DCHECK(!Runtime::Current()->UseJitCompilation()); 4995 break; 4996 case HLoadString::LoadKind::kJitTableAddress: 4997 DCHECK(Runtime::Current()->UseJitCompilation()); 4998 break; 4999 case HLoadString::LoadKind::kBootImageAddress: 5000 case HLoadString::LoadKind::kRuntimeCall: 5001 break; 5002 } 5003 return desired_string_load_kind; 5004 } 5005 5006 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { 5007 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 5008 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 5009 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 5010 InvokeRuntimeCallingConvention calling_convention; 5011 locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); 5012 } else { 5013 locations->SetOut(Location::RequiresRegister()); 5014 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 5015 if (!kUseReadBarrier || kUseBakerReadBarrier) { 5016 // Rely on the pResolveString and marking to save everything we need. 5017 RegisterSet caller_saves = RegisterSet::Empty(); 5018 InvokeRuntimeCallingConvention calling_convention; 5019 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 5020 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), 5021 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), 5022 DataType::Type::kReference).GetCode()); 5023 locations->SetCustomSlowPathCallerSaves(caller_saves); 5024 } else { 5025 // For non-Baker read barrier we have a temp-clobbering call. 5026 } 5027 } 5028 } 5029 } 5030 5031 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 5032 // move. 5033 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 5034 Register out = OutputRegister(load); 5035 Location out_loc = load->GetLocations()->Out(); 5036 5037 switch (load->GetLoadKind()) { 5038 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 5039 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 5040 // Add ADRP with its PC-relative String patch. 5041 const DexFile& dex_file = load->GetDexFile(); 5042 const dex::StringIndex string_index = load->GetStringIndex(); 5043 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); 5044 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 5045 // Add ADD with its PC-relative String patch. 5046 vixl::aarch64::Label* add_label = 5047 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); 5048 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 5049 return; 5050 } 5051 case HLoadString::LoadKind::kBootImageAddress: { 5052 uint32_t address = dchecked_integral_cast<uint32_t>( 5053 reinterpret_cast<uintptr_t>(load->GetString().Get())); 5054 DCHECK_NE(address, 0u); 5055 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 5056 return; 5057 } 5058 case HLoadString::LoadKind::kBootImageInternTable: { 5059 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 5060 // Add ADRP with its PC-relative String patch. 5061 const DexFile& dex_file = load->GetDexFile(); 5062 const dex::StringIndex string_index = load->GetStringIndex(); 5063 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); 5064 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 5065 // Add LDR with its PC-relative String patch. 5066 vixl::aarch64::Label* ldr_label = 5067 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); 5068 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); 5069 return; 5070 } 5071 case HLoadString::LoadKind::kBssEntry: { 5072 // Add ADRP with its PC-relative String .bss entry patch. 5073 const DexFile& dex_file = load->GetDexFile(); 5074 const dex::StringIndex string_index = load->GetStringIndex(); 5075 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 5076 Register temp = XRegisterFrom(out_loc); 5077 vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); 5078 codegen_->EmitAdrpPlaceholder(adrp_label, temp); 5079 // Add LDR with its .bss entry String patch. 5080 vixl::aarch64::Label* ldr_label = 5081 codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); 5082 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ 5083 GenerateGcRootFieldLoad(load, 5084 out_loc, 5085 temp, 5086 /* offset placeholder */ 0u, 5087 ldr_label, 5088 kCompilerReadBarrierOption); 5089 SlowPathCodeARM64* slow_path = 5090 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); 5091 codegen_->AddSlowPath(slow_path); 5092 __ Cbz(out.X(), slow_path->GetEntryLabel()); 5093 __ Bind(slow_path->GetExitLabel()); 5094 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 5095 return; 5096 } 5097 case HLoadString::LoadKind::kJitTableAddress: { 5098 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), 5099 load->GetStringIndex(), 5100 load->GetString())); 5101 GenerateGcRootFieldLoad(load, 5102 out_loc, 5103 out.X(), 5104 /* offset */ 0, 5105 /* fixup_label */ nullptr, 5106 kCompilerReadBarrierOption); 5107 return; 5108 } 5109 default: 5110 break; 5111 } 5112 5113 // TODO: Re-add the compiler code to do string dex cache lookup again. 5114 InvokeRuntimeCallingConvention calling_convention; 5115 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); 5116 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); 5117 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); 5118 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 5119 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 5120 } 5121 5122 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { 5123 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 5124 locations->SetOut(Location::ConstantLocation(constant)); 5125 } 5126 5127 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 5128 // Will be generated at use site. 5129 } 5130 5131 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 5132 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5133 instruction, LocationSummary::kCallOnMainOnly); 5134 InvokeRuntimeCallingConvention calling_convention; 5135 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5136 } 5137 5138 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 5139 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 5140 instruction, 5141 instruction->GetDexPc()); 5142 if (instruction->IsEnter()) { 5143 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 5144 } else { 5145 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 5146 } 5147 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 5148 } 5149 5150 void LocationsBuilderARM64::VisitMul(HMul* mul) { 5151 LocationSummary* locations = 5152 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 5153 switch (mul->GetResultType()) { 5154 case DataType::Type::kInt32: 5155 case DataType::Type::kInt64: 5156 locations->SetInAt(0, Location::RequiresRegister()); 5157 locations->SetInAt(1, Location::RequiresRegister()); 5158 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5159 break; 5160 5161 case DataType::Type::kFloat32: 5162 case DataType::Type::kFloat64: 5163 locations->SetInAt(0, Location::RequiresFpuRegister()); 5164 locations->SetInAt(1, Location::RequiresFpuRegister()); 5165 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5166 break; 5167 5168 default: 5169 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 5170 } 5171 } 5172 5173 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) { 5174 switch (mul->GetResultType()) { 5175 case DataType::Type::kInt32: 5176 case DataType::Type::kInt64: 5177 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); 5178 break; 5179 5180 case DataType::Type::kFloat32: 5181 case DataType::Type::kFloat64: 5182 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1)); 5183 break; 5184 5185 default: 5186 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 5187 } 5188 } 5189 5190 void LocationsBuilderARM64::VisitNeg(HNeg* neg) { 5191 LocationSummary* locations = 5192 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 5193 switch (neg->GetResultType()) { 5194 case DataType::Type::kInt32: 5195 case DataType::Type::kInt64: 5196 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg)); 5197 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5198 break; 5199 5200 case DataType::Type::kFloat32: 5201 case DataType::Type::kFloat64: 5202 locations->SetInAt(0, Location::RequiresFpuRegister()); 5203 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5204 break; 5205 5206 default: 5207 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 5208 } 5209 } 5210 5211 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { 5212 switch (neg->GetResultType()) { 5213 case DataType::Type::kInt32: 5214 case DataType::Type::kInt64: 5215 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0)); 5216 break; 5217 5218 case DataType::Type::kFloat32: 5219 case DataType::Type::kFloat64: 5220 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0)); 5221 break; 5222 5223 default: 5224 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 5225 } 5226 } 5227 5228 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { 5229 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5230 instruction, LocationSummary::kCallOnMainOnly); 5231 InvokeRuntimeCallingConvention calling_convention; 5232 locations->SetOut(LocationFrom(x0)); 5233 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5234 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 5235 } 5236 5237 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { 5238 // Note: if heap poisoning is enabled, the entry point takes cares 5239 // of poisoning the reference. 5240 QuickEntrypointEnum entrypoint = 5241 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); 5242 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 5243 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 5244 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 5245 } 5246 5247 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { 5248 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5249 instruction, LocationSummary::kCallOnMainOnly); 5250 InvokeRuntimeCallingConvention calling_convention; 5251 if (instruction->IsStringAlloc()) { 5252 locations->AddTemp(LocationFrom(kArtMethodRegister)); 5253 } else { 5254 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5255 } 5256 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 5257 } 5258 5259 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { 5260 // Note: if heap poisoning is enabled, the entry point takes cares 5261 // of poisoning the reference. 5262 if (instruction->IsStringAlloc()) { 5263 // String is allocated through StringFactory. Call NewEmptyString entry point. 5264 Location temp = instruction->GetLocations()->GetTemp(0); 5265 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 5266 __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString))); 5267 __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value())); 5268 5269 { 5270 // Ensure the pc position is recorded immediately after the `blr` instruction. 5271 ExactAssemblyScope eas(GetVIXLAssembler(), 5272 kInstructionSize, 5273 CodeBufferCheckScope::kExactSize); 5274 __ blr(lr); 5275 codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); 5276 } 5277 } else { 5278 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 5279 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 5280 } 5281 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 5282 } 5283 5284 void LocationsBuilderARM64::VisitNot(HNot* instruction) { 5285 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5286 locations->SetInAt(0, Location::RequiresRegister()); 5287 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5288 } 5289 5290 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { 5291 switch (instruction->GetResultType()) { 5292 case DataType::Type::kInt32: 5293 case DataType::Type::kInt64: 5294 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0)); 5295 break; 5296 5297 default: 5298 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); 5299 } 5300 } 5301 5302 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) { 5303 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5304 locations->SetInAt(0, Location::RequiresRegister()); 5305 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5306 } 5307 5308 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) { 5309 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1)); 5310 } 5311 5312 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { 5313 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 5314 locations->SetInAt(0, Location::RequiresRegister()); 5315 } 5316 5317 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { 5318 if (CanMoveNullCheckToUser(instruction)) { 5319 return; 5320 } 5321 { 5322 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 5323 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 5324 Location obj = instruction->GetLocations()->InAt(0); 5325 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); 5326 RecordPcInfo(instruction, instruction->GetDexPc()); 5327 } 5328 } 5329 5330 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { 5331 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction); 5332 AddSlowPath(slow_path); 5333 5334 LocationSummary* locations = instruction->GetLocations(); 5335 Location obj = locations->InAt(0); 5336 5337 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); 5338 } 5339 5340 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { 5341 codegen_->GenerateNullCheck(instruction); 5342 } 5343 5344 void LocationsBuilderARM64::VisitOr(HOr* instruction) { 5345 HandleBinaryOp(instruction); 5346 } 5347 5348 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { 5349 HandleBinaryOp(instruction); 5350 } 5351 5352 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5353 LOG(FATAL) << "Unreachable"; 5354 } 5355 5356 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) { 5357 if (instruction->GetNext()->IsSuspendCheck() && 5358 instruction->GetBlock()->GetLoopInformation() != nullptr) { 5359 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 5360 // The back edge will generate the suspend check. 5361 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 5362 } 5363 5364 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5365 } 5366 5367 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { 5368 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5369 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 5370 if (location.IsStackSlot()) { 5371 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5372 } else if (location.IsDoubleStackSlot()) { 5373 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5374 } 5375 locations->SetOut(location); 5376 } 5377 5378 void InstructionCodeGeneratorARM64::VisitParameterValue( 5379 HParameterValue* instruction ATTRIBUTE_UNUSED) { 5380 // Nothing to do, the parameter is already at its location. 5381 } 5382 5383 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { 5384 LocationSummary* locations = 5385 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5386 locations->SetOut(LocationFrom(kArtMethodRegister)); 5387 } 5388 5389 void InstructionCodeGeneratorARM64::VisitCurrentMethod( 5390 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 5391 // Nothing to do, the method is already at its location. 5392 } 5393 5394 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { 5395 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5396 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 5397 locations->SetInAt(i, Location::Any()); 5398 } 5399 locations->SetOut(Location::Any()); 5400 } 5401 5402 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 5403 LOG(FATAL) << "Unreachable"; 5404 } 5405 5406 void LocationsBuilderARM64::VisitRem(HRem* rem) { 5407 DataType::Type type = rem->GetResultType(); 5408 LocationSummary::CallKind call_kind = 5409 DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly 5410 : LocationSummary::kNoCall; 5411 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); 5412 5413 switch (type) { 5414 case DataType::Type::kInt32: 5415 case DataType::Type::kInt64: 5416 locations->SetInAt(0, Location::RequiresRegister()); 5417 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 5418 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5419 break; 5420 5421 case DataType::Type::kFloat32: 5422 case DataType::Type::kFloat64: { 5423 InvokeRuntimeCallingConvention calling_convention; 5424 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 5425 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 5426 locations->SetOut(calling_convention.GetReturnLocation(type)); 5427 5428 break; 5429 } 5430 5431 default: 5432 LOG(FATAL) << "Unexpected rem type " << type; 5433 } 5434 } 5435 5436 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { 5437 DataType::Type type = rem->GetResultType(); 5438 5439 switch (type) { 5440 case DataType::Type::kInt32: 5441 case DataType::Type::kInt64: { 5442 GenerateDivRemIntegral(rem); 5443 break; 5444 } 5445 5446 case DataType::Type::kFloat32: 5447 case DataType::Type::kFloat64: { 5448 QuickEntrypointEnum entrypoint = 5449 (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; 5450 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc()); 5451 if (type == DataType::Type::kFloat32) { 5452 CheckEntrypointTypes<kQuickFmodf, float, float, float>(); 5453 } else { 5454 CheckEntrypointTypes<kQuickFmod, double, double, double>(); 5455 } 5456 break; 5457 } 5458 5459 default: 5460 LOG(FATAL) << "Unexpected rem type " << type; 5461 UNREACHABLE(); 5462 } 5463 } 5464 5465 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { 5466 constructor_fence->SetLocations(nullptr); 5467 } 5468 5469 void InstructionCodeGeneratorARM64::VisitConstructorFence( 5470 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 5471 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 5472 } 5473 5474 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5475 memory_barrier->SetLocations(nullptr); 5476 } 5477 5478 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5479 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 5480 } 5481 5482 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { 5483 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5484 DataType::Type return_type = instruction->InputAt(0)->GetType(); 5485 locations->SetInAt(0, ARM64ReturnLocation(return_type)); 5486 } 5487 5488 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) { 5489 codegen_->GenerateFrameExit(); 5490 } 5491 5492 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { 5493 instruction->SetLocations(nullptr); 5494 } 5495 5496 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) { 5497 codegen_->GenerateFrameExit(); 5498 } 5499 5500 void LocationsBuilderARM64::VisitRor(HRor* ror) { 5501 HandleBinaryOp(ror); 5502 } 5503 5504 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) { 5505 HandleBinaryOp(ror); 5506 } 5507 5508 void LocationsBuilderARM64::VisitShl(HShl* shl) { 5509 HandleShift(shl); 5510 } 5511 5512 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) { 5513 HandleShift(shl); 5514 } 5515 5516 void LocationsBuilderARM64::VisitShr(HShr* shr) { 5517 HandleShift(shr); 5518 } 5519 5520 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) { 5521 HandleShift(shr); 5522 } 5523 5524 void LocationsBuilderARM64::VisitSub(HSub* instruction) { 5525 HandleBinaryOp(instruction); 5526 } 5527 5528 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) { 5529 HandleBinaryOp(instruction); 5530 } 5531 5532 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5533 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5534 } 5535 5536 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5537 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5538 } 5539 5540 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5541 HandleFieldSet(instruction); 5542 } 5543 5544 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5545 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5546 } 5547 5548 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet( 5549 HUnresolvedInstanceFieldGet* instruction) { 5550 FieldAccessCallingConventionARM64 calling_convention; 5551 codegen_->CreateUnresolvedFieldLocationSummary( 5552 instruction, instruction->GetFieldType(), calling_convention); 5553 } 5554 5555 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet( 5556 HUnresolvedInstanceFieldGet* instruction) { 5557 FieldAccessCallingConventionARM64 calling_convention; 5558 codegen_->GenerateUnresolvedFieldAccess(instruction, 5559 instruction->GetFieldType(), 5560 instruction->GetFieldIndex(), 5561 instruction->GetDexPc(), 5562 calling_convention); 5563 } 5564 5565 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet( 5566 HUnresolvedInstanceFieldSet* instruction) { 5567 FieldAccessCallingConventionARM64 calling_convention; 5568 codegen_->CreateUnresolvedFieldLocationSummary( 5569 instruction, instruction->GetFieldType(), calling_convention); 5570 } 5571 5572 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet( 5573 HUnresolvedInstanceFieldSet* instruction) { 5574 FieldAccessCallingConventionARM64 calling_convention; 5575 codegen_->GenerateUnresolvedFieldAccess(instruction, 5576 instruction->GetFieldType(), 5577 instruction->GetFieldIndex(), 5578 instruction->GetDexPc(), 5579 calling_convention); 5580 } 5581 5582 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet( 5583 HUnresolvedStaticFieldGet* instruction) { 5584 FieldAccessCallingConventionARM64 calling_convention; 5585 codegen_->CreateUnresolvedFieldLocationSummary( 5586 instruction, instruction->GetFieldType(), calling_convention); 5587 } 5588 5589 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet( 5590 HUnresolvedStaticFieldGet* instruction) { 5591 FieldAccessCallingConventionARM64 calling_convention; 5592 codegen_->GenerateUnresolvedFieldAccess(instruction, 5593 instruction->GetFieldType(), 5594 instruction->GetFieldIndex(), 5595 instruction->GetDexPc(), 5596 calling_convention); 5597 } 5598 5599 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet( 5600 HUnresolvedStaticFieldSet* instruction) { 5601 FieldAccessCallingConventionARM64 calling_convention; 5602 codegen_->CreateUnresolvedFieldLocationSummary( 5603 instruction, instruction->GetFieldType(), calling_convention); 5604 } 5605 5606 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( 5607 HUnresolvedStaticFieldSet* instruction) { 5608 FieldAccessCallingConventionARM64 calling_convention; 5609 codegen_->GenerateUnresolvedFieldAccess(instruction, 5610 instruction->GetFieldType(), 5611 instruction->GetFieldIndex(), 5612 instruction->GetDexPc(), 5613 calling_convention); 5614 } 5615 5616 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5617 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5618 instruction, LocationSummary::kCallOnSlowPath); 5619 // In suspend check slow path, usually there are no caller-save registers at all. 5620 // If SIMD instructions are present, however, we force spilling all live SIMD 5621 // registers in full width (since the runtime only saves/restores lower part). 5622 locations->SetCustomSlowPathCallerSaves( 5623 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5624 } 5625 5626 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5627 HBasicBlock* block = instruction->GetBlock(); 5628 if (block->GetLoopInformation() != nullptr) { 5629 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5630 // The back edge will generate the suspend check. 5631 return; 5632 } 5633 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5634 // The goto will generate the suspend check. 5635 return; 5636 } 5637 GenerateSuspendCheck(instruction, nullptr); 5638 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 5639 } 5640 5641 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { 5642 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5643 instruction, LocationSummary::kCallOnMainOnly); 5644 InvokeRuntimeCallingConvention calling_convention; 5645 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5646 } 5647 5648 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { 5649 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 5650 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 5651 } 5652 5653 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { 5654 LocationSummary* locations = 5655 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); 5656 DataType::Type input_type = conversion->GetInputType(); 5657 DataType::Type result_type = conversion->GetResultType(); 5658 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 5659 << input_type << " -> " << result_type; 5660 if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || 5661 (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { 5662 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; 5663 } 5664 5665 if (DataType::IsFloatingPointType(input_type)) { 5666 locations->SetInAt(0, Location::RequiresFpuRegister()); 5667 } else { 5668 locations->SetInAt(0, Location::RequiresRegister()); 5669 } 5670 5671 if (DataType::IsFloatingPointType(result_type)) { 5672 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5673 } else { 5674 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5675 } 5676 } 5677 5678 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) { 5679 DataType::Type result_type = conversion->GetResultType(); 5680 DataType::Type input_type = conversion->GetInputType(); 5681 5682 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 5683 << input_type << " -> " << result_type; 5684 5685 if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { 5686 int result_size = DataType::Size(result_type); 5687 int input_size = DataType::Size(input_type); 5688 int min_size = std::min(result_size, input_size); 5689 Register output = OutputRegister(conversion); 5690 Register source = InputRegisterAt(conversion, 0); 5691 if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) { 5692 // 'int' values are used directly as W registers, discarding the top 5693 // bits, so we don't need to sign-extend and can just perform a move. 5694 // We do not pass the `kDiscardForSameWReg` argument to force clearing the 5695 // top 32 bits of the target register. We theoretically could leave those 5696 // bits unchanged, but we would have to make sure that no code uses a 5697 // 32bit input value as a 64bit value assuming that the top 32 bits are 5698 // zero. 5699 __ Mov(output.W(), source.W()); 5700 } else if (DataType::IsUnsignedType(result_type) || 5701 (DataType::IsUnsignedType(input_type) && input_size < result_size)) { 5702 __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte); 5703 } else { 5704 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); 5705 } 5706 } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { 5707 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); 5708 } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { 5709 CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); 5710 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0)); 5711 } else if (DataType::IsFloatingPointType(result_type) && 5712 DataType::IsFloatingPointType(input_type)) { 5713 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0)); 5714 } else { 5715 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type 5716 << " to " << result_type; 5717 } 5718 } 5719 5720 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) { 5721 HandleShift(ushr); 5722 } 5723 5724 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) { 5725 HandleShift(ushr); 5726 } 5727 5728 void LocationsBuilderARM64::VisitXor(HXor* instruction) { 5729 HandleBinaryOp(instruction); 5730 } 5731 5732 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { 5733 HandleBinaryOp(instruction); 5734 } 5735 5736 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5737 // Nothing to do, this should be removed during prepare for register allocator. 5738 LOG(FATAL) << "Unreachable"; 5739 } 5740 5741 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5742 // Nothing to do, this should be removed during prepare for register allocator. 5743 LOG(FATAL) << "Unreachable"; 5744 } 5745 5746 // Simple implementation of packed switch - generate cascaded compare/jumps. 5747 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5748 LocationSummary* locations = 5749 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 5750 locations->SetInAt(0, Location::RequiresRegister()); 5751 } 5752 5753 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5754 int32_t lower_bound = switch_instr->GetStartValue(); 5755 uint32_t num_entries = switch_instr->GetNumEntries(); 5756 Register value_reg = InputRegisterAt(switch_instr, 0); 5757 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 5758 5759 // Roughly set 16 as max average assemblies generated per HIR in a graph. 5760 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize; 5761 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to 5762 // make sure we don't emit it if the target may run out of range. 5763 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR 5764 // ranges and emit the tables only as required. 5765 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; 5766 5767 if (num_entries <= kPackedSwitchCompareJumpThreshold || 5768 // Current instruction id is an upper bound of the number of HIRs in the graph. 5769 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { 5770 // Create a series of compare/jumps. 5771 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5772 Register temp = temps.AcquireW(); 5773 __ Subs(temp, value_reg, Operand(lower_bound)); 5774 5775 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 5776 // Jump to successors[0] if value == lower_bound. 5777 __ B(eq, codegen_->GetLabelOf(successors[0])); 5778 int32_t last_index = 0; 5779 for (; num_entries - last_index > 2; last_index += 2) { 5780 __ Subs(temp, temp, Operand(2)); 5781 // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. 5782 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); 5783 // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. 5784 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); 5785 } 5786 if (num_entries - last_index == 2) { 5787 // The last missing case_value. 5788 __ Cmp(temp, Operand(1)); 5789 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); 5790 } 5791 5792 // And the default for any other value. 5793 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 5794 __ B(codegen_->GetLabelOf(default_block)); 5795 } 5796 } else { 5797 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr); 5798 5799 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5800 5801 // Below instructions should use at most one blocked register. Since there are two blocked 5802 // registers, we are free to block one. 5803 Register temp_w = temps.AcquireW(); 5804 Register index; 5805 // Remove the bias. 5806 if (lower_bound != 0) { 5807 index = temp_w; 5808 __ Sub(index, value_reg, Operand(lower_bound)); 5809 } else { 5810 index = value_reg; 5811 } 5812 5813 // Jump to default block if index is out of the range. 5814 __ Cmp(index, Operand(num_entries)); 5815 __ B(hs, codegen_->GetLabelOf(default_block)); 5816 5817 // In current VIXL implementation, it won't require any blocked registers to encode the 5818 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the 5819 // register pressure. 5820 Register table_base = temps.AcquireX(); 5821 // Load jump offset from the table. 5822 __ Adr(table_base, jump_table->GetTableStartLabel()); 5823 Register jump_offset = temp_w; 5824 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); 5825 5826 // Jump to target block by branching to table_base(pc related) + offset. 5827 Register target_address = table_base; 5828 __ Add(target_address, table_base, Operand(jump_offset, SXTW)); 5829 __ Br(target_address); 5830 } 5831 } 5832 5833 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( 5834 HInstruction* instruction, 5835 Location out, 5836 uint32_t offset, 5837 Location maybe_temp, 5838 ReadBarrierOption read_barrier_option) { 5839 DataType::Type type = DataType::Type::kReference; 5840 Register out_reg = RegisterFrom(out, type); 5841 if (read_barrier_option == kWithReadBarrier) { 5842 CHECK(kEmitCompilerReadBarrier); 5843 if (kUseBakerReadBarrier) { 5844 // Load with fast path based Baker's read barrier. 5845 // /* HeapReference<Object> */ out = *(out + offset) 5846 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5847 out, 5848 out_reg, 5849 offset, 5850 maybe_temp, 5851 /* needs_null_check */ false, 5852 /* use_load_acquire */ false); 5853 } else { 5854 // Load with slow path based read barrier. 5855 // Save the value of `out` into `maybe_temp` before overwriting it 5856 // in the following move operation, as we will need it for the 5857 // read barrier below. 5858 Register temp_reg = RegisterFrom(maybe_temp, type); 5859 __ Mov(temp_reg, out_reg); 5860 // /* HeapReference<Object> */ out = *(out + offset) 5861 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5862 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 5863 } 5864 } else { 5865 // Plain load with no read barrier. 5866 // /* HeapReference<Object> */ out = *(out + offset) 5867 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5868 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5869 } 5870 } 5871 5872 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( 5873 HInstruction* instruction, 5874 Location out, 5875 Location obj, 5876 uint32_t offset, 5877 Location maybe_temp, 5878 ReadBarrierOption read_barrier_option) { 5879 DataType::Type type = DataType::Type::kReference; 5880 Register out_reg = RegisterFrom(out, type); 5881 Register obj_reg = RegisterFrom(obj, type); 5882 if (read_barrier_option == kWithReadBarrier) { 5883 CHECK(kEmitCompilerReadBarrier); 5884 if (kUseBakerReadBarrier) { 5885 // Load with fast path based Baker's read barrier. 5886 // /* HeapReference<Object> */ out = *(obj + offset) 5887 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5888 out, 5889 obj_reg, 5890 offset, 5891 maybe_temp, 5892 /* needs_null_check */ false, 5893 /* use_load_acquire */ false); 5894 } else { 5895 // Load with slow path based read barrier. 5896 // /* HeapReference<Object> */ out = *(obj + offset) 5897 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5898 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 5899 } 5900 } else { 5901 // Plain load with no read barrier. 5902 // /* HeapReference<Object> */ out = *(obj + offset) 5903 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5904 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5905 } 5906 } 5907 5908 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( 5909 HInstruction* instruction, 5910 Location root, 5911 Register obj, 5912 uint32_t offset, 5913 vixl::aarch64::Label* fixup_label, 5914 ReadBarrierOption read_barrier_option) { 5915 DCHECK(fixup_label == nullptr || offset == 0u); 5916 Register root_reg = RegisterFrom(root, DataType::Type::kReference); 5917 if (read_barrier_option == kWithReadBarrier) { 5918 DCHECK(kEmitCompilerReadBarrier); 5919 if (kUseBakerReadBarrier) { 5920 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 5921 // Baker's read barrier are used. 5922 if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && 5923 !Runtime::Current()->UseJitCompilation()) { 5924 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 5925 // the Marking Register) to decide whether we need to enter 5926 // the slow path to mark the GC root. 5927 // 5928 // We use link-time generated thunks for the slow path. That thunk 5929 // checks the reference and jumps to the entrypoint if needed. 5930 // 5931 // lr = &return_address; 5932 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 5933 // if (mr) { // Thread::Current()->GetIsGcMarking() 5934 // goto gc_root_thunk<root_reg>(lr) 5935 // } 5936 // return_address: 5937 5938 UseScratchRegisterScope temps(GetVIXLAssembler()); 5939 DCHECK(temps.IsAvailable(ip0)); 5940 DCHECK(temps.IsAvailable(ip1)); 5941 temps.Exclude(ip0, ip1); 5942 uint32_t custom_data = 5943 linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); 5944 vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data); 5945 5946 EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); 5947 vixl::aarch64::Label return_address; 5948 __ adr(lr, &return_address); 5949 if (fixup_label != nullptr) { 5950 __ Bind(fixup_label); 5951 } 5952 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, 5953 "GC root LDR must be 2 instruction (8B) before the return address label."); 5954 __ ldr(root_reg, MemOperand(obj.X(), offset)); 5955 __ Bind(cbnz_label); 5956 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 5957 __ Bind(&return_address); 5958 } else { 5959 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 5960 // the Marking Register) to decide whether we need to enter 5961 // the slow path to mark the GC root. 5962 // 5963 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 5964 // if (mr) { // Thread::Current()->GetIsGcMarking() 5965 // // Slow path. 5966 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 5967 // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. 5968 // } 5969 5970 // Slow path marking the GC root `root`. The entrypoint will 5971 // be loaded by the slow path code. 5972 SlowPathCodeARM64* slow_path = 5973 new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root); 5974 codegen_->AddSlowPath(slow_path); 5975 5976 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 5977 if (fixup_label == nullptr) { 5978 __ Ldr(root_reg, MemOperand(obj, offset)); 5979 } else { 5980 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); 5981 } 5982 static_assert( 5983 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 5984 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 5985 "have different sizes."); 5986 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 5987 "art::mirror::CompressedReference<mirror::Object> and int32_t " 5988 "have different sizes."); 5989 5990 __ Cbnz(mr, slow_path->GetEntryLabel()); 5991 __ Bind(slow_path->GetExitLabel()); 5992 } 5993 } else { 5994 // GC root loaded through a slow path for read barriers other 5995 // than Baker's. 5996 // /* GcRoot<mirror::Object>* */ root = obj + offset 5997 if (fixup_label == nullptr) { 5998 __ Add(root_reg.X(), obj.X(), offset); 5999 } else { 6000 codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); 6001 } 6002 // /* mirror::Object* */ root = root->Read() 6003 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 6004 } 6005 } else { 6006 // Plain GC root load with no read barrier. 6007 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 6008 if (fixup_label == nullptr) { 6009 __ Ldr(root_reg, MemOperand(obj, offset)); 6010 } else { 6011 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); 6012 } 6013 // Note that GC roots are not affected by heap poisoning, thus we 6014 // do not have to unpoison `root_reg` here. 6015 } 6016 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 6017 } 6018 6019 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 6020 Location ref, 6021 Register obj, 6022 uint32_t offset, 6023 Location maybe_temp, 6024 bool needs_null_check, 6025 bool use_load_acquire) { 6026 DCHECK(kEmitCompilerReadBarrier); 6027 DCHECK(kUseBakerReadBarrier); 6028 6029 if (kBakerReadBarrierLinkTimeThunksEnableForFields && 6030 !use_load_acquire && 6031 !Runtime::Current()->UseJitCompilation()) { 6032 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6033 // Marking Register) to decide whether we need to enter the slow 6034 // path to mark the reference. Then, in the slow path, check the 6035 // gray bit in the lock word of the reference's holder (`obj`) to 6036 // decide whether to mark `ref` or not. 6037 // 6038 // We use link-time generated thunks for the slow path. That thunk checks 6039 // the holder and jumps to the entrypoint if needed. If the holder is not 6040 // gray, it creates a fake dependency and returns to the LDR instruction. 6041 // 6042 // lr = &gray_return_address; 6043 // if (mr) { // Thread::Current()->GetIsGcMarking() 6044 // goto field_thunk<holder_reg, base_reg>(lr) 6045 // } 6046 // not_gray_return_address: 6047 // // Original reference load. If the offset is too large to fit 6048 // // into LDR, we use an adjusted base register here. 6049 // HeapReference<mirror::Object> reference = *(obj+offset); 6050 // gray_return_address: 6051 6052 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); 6053 Register base = obj; 6054 if (offset >= kReferenceLoadMinFarOffset) { 6055 DCHECK(maybe_temp.IsRegister()); 6056 base = WRegisterFrom(maybe_temp); 6057 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); 6058 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); 6059 offset &= (kReferenceLoadMinFarOffset - 1u); 6060 } 6061 UseScratchRegisterScope temps(GetVIXLAssembler()); 6062 DCHECK(temps.IsAvailable(ip0)); 6063 DCHECK(temps.IsAvailable(ip1)); 6064 temps.Exclude(ip0, ip1); 6065 uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData( 6066 base.GetCode(), 6067 obj.GetCode()); 6068 vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); 6069 6070 { 6071 EmissionCheckScope guard(GetVIXLAssembler(), 6072 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 6073 vixl::aarch64::Label return_address; 6074 __ adr(lr, &return_address); 6075 __ Bind(cbnz_label); 6076 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 6077 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6078 "Field LDR must be 1 instruction (4B) before the return address label; " 6079 " 2 instructions (8B) for heap poisoning."); 6080 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 6081 __ ldr(ref_reg, MemOperand(base.X(), offset)); 6082 if (needs_null_check) { 6083 MaybeRecordImplicitNullCheck(instruction); 6084 } 6085 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 6086 __ Bind(&return_address); 6087 } 6088 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); 6089 return; 6090 } 6091 6092 // /* HeapReference<Object> */ ref = *(obj + offset) 6093 Register temp = WRegisterFrom(maybe_temp); 6094 Location no_index = Location::NoLocation(); 6095 size_t no_scale_factor = 0u; 6096 GenerateReferenceLoadWithBakerReadBarrier(instruction, 6097 ref, 6098 obj, 6099 offset, 6100 no_index, 6101 no_scale_factor, 6102 temp, 6103 needs_null_check, 6104 use_load_acquire); 6105 } 6106 6107 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 6108 Location ref, 6109 Register obj, 6110 uint32_t data_offset, 6111 Location index, 6112 Register temp, 6113 bool needs_null_check) { 6114 DCHECK(kEmitCompilerReadBarrier); 6115 DCHECK(kUseBakerReadBarrier); 6116 6117 static_assert( 6118 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 6119 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 6120 size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); 6121 6122 if (kBakerReadBarrierLinkTimeThunksEnableForArrays && 6123 !Runtime::Current()->UseJitCompilation()) { 6124 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6125 // Marking Register) to decide whether we need to enter the slow 6126 // path to mark the reference. Then, in the slow path, check the 6127 // gray bit in the lock word of the reference's holder (`obj`) to 6128 // decide whether to mark `ref` or not. 6129 // 6130 // We use link-time generated thunks for the slow path. That thunk checks 6131 // the holder and jumps to the entrypoint if needed. If the holder is not 6132 // gray, it creates a fake dependency and returns to the LDR instruction. 6133 // 6134 // lr = &gray_return_address; 6135 // if (mr) { // Thread::Current()->GetIsGcMarking() 6136 // goto array_thunk<base_reg>(lr) 6137 // } 6138 // not_gray_return_address: 6139 // // Original reference load. If the offset is too large to fit 6140 // // into LDR, we use an adjusted base register here. 6141 // HeapReference<mirror::Object> reference = data[index]; 6142 // gray_return_address: 6143 6144 DCHECK(index.IsValid()); 6145 Register index_reg = RegisterFrom(index, DataType::Type::kInt32); 6146 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 6147 6148 UseScratchRegisterScope temps(GetVIXLAssembler()); 6149 DCHECK(temps.IsAvailable(ip0)); 6150 DCHECK(temps.IsAvailable(ip1)); 6151 temps.Exclude(ip0, ip1); 6152 uint32_t custom_data = 6153 linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); 6154 vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); 6155 6156 __ Add(temp.X(), obj.X(), Operand(data_offset)); 6157 { 6158 EmissionCheckScope guard(GetVIXLAssembler(), 6159 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 6160 vixl::aarch64::Label return_address; 6161 __ adr(lr, &return_address); 6162 __ Bind(cbnz_label); 6163 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 6164 static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6165 "Array LDR must be 1 instruction (4B) before the return address label; " 6166 " 2 instructions (8B) for heap poisoning."); 6167 __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); 6168 DCHECK(!needs_null_check); // The thunk cannot handle the null check. 6169 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 6170 __ Bind(&return_address); 6171 } 6172 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); 6173 return; 6174 } 6175 6176 // Array cells are never volatile variables, therefore array loads 6177 // never use Load-Acquire instructions on ARM64. 6178 const bool use_load_acquire = false; 6179 6180 // /* HeapReference<Object> */ ref = 6181 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 6182 GenerateReferenceLoadWithBakerReadBarrier(instruction, 6183 ref, 6184 obj, 6185 data_offset, 6186 index, 6187 scale_factor, 6188 temp, 6189 needs_null_check, 6190 use_load_acquire); 6191 } 6192 6193 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 6194 Location ref, 6195 Register obj, 6196 uint32_t offset, 6197 Location index, 6198 size_t scale_factor, 6199 Register temp, 6200 bool needs_null_check, 6201 bool use_load_acquire) { 6202 DCHECK(kEmitCompilerReadBarrier); 6203 DCHECK(kUseBakerReadBarrier); 6204 // If we are emitting an array load, we should not be using a 6205 // Load Acquire instruction. In other words: 6206 // `instruction->IsArrayGet()` => `!use_load_acquire`. 6207 DCHECK(!instruction->IsArrayGet() || !use_load_acquire); 6208 6209 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6210 // Marking Register) to decide whether we need to enter the slow 6211 // path to mark the reference. Then, in the slow path, check the 6212 // gray bit in the lock word of the reference's holder (`obj`) to 6213 // decide whether to mark `ref` or not. 6214 // 6215 // if (mr) { // Thread::Current()->GetIsGcMarking() 6216 // // Slow path. 6217 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 6218 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 6219 // HeapReference<mirror::Object> ref = *src; // Original reference load. 6220 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 6221 // if (is_gray) { 6222 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 6223 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 6224 // } 6225 // } else { 6226 // HeapReference<mirror::Object> ref = *src; // Original reference load. 6227 // } 6228 6229 // Slow path marking the object `ref` when the GC is marking. The 6230 // entrypoint will be loaded by the slow path code. 6231 SlowPathCodeARM64* slow_path = 6232 new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64( 6233 instruction, 6234 ref, 6235 obj, 6236 offset, 6237 index, 6238 scale_factor, 6239 needs_null_check, 6240 use_load_acquire, 6241 temp); 6242 AddSlowPath(slow_path); 6243 6244 __ Cbnz(mr, slow_path->GetEntryLabel()); 6245 // Fast path: the GC is not marking: just load the reference. 6246 GenerateRawReferenceLoad( 6247 instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); 6248 __ Bind(slow_path->GetExitLabel()); 6249 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 6250 } 6251 6252 void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, 6253 Location ref, 6254 Register obj, 6255 Location field_offset, 6256 Register temp, 6257 bool needs_null_check, 6258 bool use_load_acquire) { 6259 DCHECK(kEmitCompilerReadBarrier); 6260 DCHECK(kUseBakerReadBarrier); 6261 // If we are emitting an array load, we should not be using a 6262 // Load Acquire instruction. In other words: 6263 // `instruction->IsArrayGet()` => `!use_load_acquire`. 6264 DCHECK(!instruction->IsArrayGet() || !use_load_acquire); 6265 6266 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6267 // Marking Register) to decide whether we need to enter the slow 6268 // path to update the reference field within `obj`. Then, in the 6269 // slow path, check the gray bit in the lock word of the reference's 6270 // holder (`obj`) to decide whether to mark `ref` and update the 6271 // field or not. 6272 // 6273 // if (mr) { // Thread::Current()->GetIsGcMarking() 6274 // // Slow path. 6275 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 6276 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 6277 // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. 6278 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 6279 // if (is_gray) { 6280 // old_ref = ref; 6281 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() 6282 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. 6283 // compareAndSwapObject(obj, field_offset, old_ref, ref); 6284 // } 6285 // } 6286 6287 // Slow path updating the object reference at address `obj + field_offset` 6288 // when the GC is marking. The entrypoint will be loaded by the slow path code. 6289 SlowPathCodeARM64* slow_path = 6290 new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( 6291 instruction, 6292 ref, 6293 obj, 6294 /* offset */ 0u, 6295 /* index */ field_offset, 6296 /* scale_factor */ 0u /* "times 1" */, 6297 needs_null_check, 6298 use_load_acquire, 6299 temp); 6300 AddSlowPath(slow_path); 6301 6302 __ Cbnz(mr, slow_path->GetEntryLabel()); 6303 // Fast path: the GC is not marking: nothing to do (the field is 6304 // up-to-date, and we don't need to load the reference). 6305 __ Bind(slow_path->GetExitLabel()); 6306 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); 6307 } 6308 6309 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, 6310 Location ref, 6311 Register obj, 6312 uint32_t offset, 6313 Location index, 6314 size_t scale_factor, 6315 bool needs_null_check, 6316 bool use_load_acquire) { 6317 DCHECK(obj.IsW()); 6318 DataType::Type type = DataType::Type::kReference; 6319 Register ref_reg = RegisterFrom(ref, type); 6320 6321 // If needed, vixl::EmissionCheckScope guards are used to ensure 6322 // that no pools are emitted between the load (macro) instruction 6323 // and MaybeRecordImplicitNullCheck. 6324 6325 if (index.IsValid()) { 6326 // Load types involving an "index": ArrayGet, 6327 // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject 6328 // intrinsics. 6329 if (use_load_acquire) { 6330 // UnsafeGetObjectVolatile intrinsic case. 6331 // Register `index` is not an index in an object array, but an 6332 // offset to an object reference field within object `obj`. 6333 DCHECK(instruction->IsInvoke()) << instruction->DebugName(); 6334 DCHECK(instruction->GetLocations()->Intrinsified()); 6335 DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) 6336 << instruction->AsInvoke()->GetIntrinsic(); 6337 DCHECK_EQ(offset, 0u); 6338 DCHECK_EQ(scale_factor, 0u); 6339 DCHECK_EQ(needs_null_check, false); 6340 // /* HeapReference<mirror::Object> */ ref = *(obj + index) 6341 MemOperand field = HeapOperand(obj, XRegisterFrom(index)); 6342 LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); 6343 } else { 6344 // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases. 6345 // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) 6346 if (index.IsConstant()) { 6347 uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); 6348 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 6349 Load(type, ref_reg, HeapOperand(obj, computed_offset)); 6350 if (needs_null_check) { 6351 MaybeRecordImplicitNullCheck(instruction); 6352 } 6353 } else { 6354 UseScratchRegisterScope temps(GetVIXLAssembler()); 6355 Register temp = temps.AcquireW(); 6356 __ Add(temp, obj, offset); 6357 { 6358 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 6359 Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor)); 6360 if (needs_null_check) { 6361 MaybeRecordImplicitNullCheck(instruction); 6362 } 6363 } 6364 } 6365 } 6366 } else { 6367 // /* HeapReference<mirror::Object> */ ref = *(obj + offset) 6368 MemOperand field = HeapOperand(obj, offset); 6369 if (use_load_acquire) { 6370 // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire. 6371 LoadAcquire(instruction, ref_reg, field, needs_null_check); 6372 } else { 6373 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 6374 Load(type, ref_reg, field); 6375 if (needs_null_check) { 6376 MaybeRecordImplicitNullCheck(instruction); 6377 } 6378 } 6379 } 6380 6381 // Object* ref = ref_addr->AsMirrorPtr() 6382 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 6383 } 6384 6385 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { 6386 // The following condition is a compile-time one, so it does not have a run-time cost. 6387 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { 6388 // The following condition is a run-time one; it is executed after the 6389 // previous compile-time test, to avoid penalizing non-debug builds. 6390 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { 6391 UseScratchRegisterScope temps(GetVIXLAssembler()); 6392 Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW(); 6393 GetAssembler()->GenerateMarkingRegisterCheck(temp, code); 6394 } 6395 } 6396 } 6397 6398 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, 6399 Location out, 6400 Location ref, 6401 Location obj, 6402 uint32_t offset, 6403 Location index) { 6404 DCHECK(kEmitCompilerReadBarrier); 6405 6406 // Insert a slow path based read barrier *after* the reference load. 6407 // 6408 // If heap poisoning is enabled, the unpoisoning of the loaded 6409 // reference will be carried out by the runtime within the slow 6410 // path. 6411 // 6412 // Note that `ref` currently does not get unpoisoned (when heap 6413 // poisoning is enabled), which is alright as the `ref` argument is 6414 // not used by the artReadBarrierSlow entry point. 6415 // 6416 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 6417 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) 6418 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); 6419 AddSlowPath(slow_path); 6420 6421 __ B(slow_path->GetEntryLabel()); 6422 __ Bind(slow_path->GetExitLabel()); 6423 } 6424 6425 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 6426 Location out, 6427 Location ref, 6428 Location obj, 6429 uint32_t offset, 6430 Location index) { 6431 if (kEmitCompilerReadBarrier) { 6432 // Baker's read barriers shall be handled by the fast path 6433 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). 6434 DCHECK(!kUseBakerReadBarrier); 6435 // If heap poisoning is enabled, unpoisoning will be taken care of 6436 // by the runtime within the slow path. 6437 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 6438 } else if (kPoisonHeapReferences) { 6439 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); 6440 } 6441 } 6442 6443 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 6444 Location out, 6445 Location root) { 6446 DCHECK(kEmitCompilerReadBarrier); 6447 6448 // Insert a slow path based read barrier *after* the GC root load. 6449 // 6450 // Note that GC roots are not affected by heap poisoning, so we do 6451 // not need to do anything special for this here. 6452 SlowPathCodeARM64* slow_path = 6453 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root); 6454 AddSlowPath(slow_path); 6455 6456 __ B(slow_path->GetEntryLabel()); 6457 __ Bind(slow_path->GetExitLabel()); 6458 } 6459 6460 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) { 6461 LocationSummary* locations = 6462 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6463 locations->SetInAt(0, Location::RequiresRegister()); 6464 locations->SetOut(Location::RequiresRegister()); 6465 } 6466 6467 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) { 6468 LocationSummary* locations = instruction->GetLocations(); 6469 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 6470 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 6471 instruction->GetIndex(), kArm64PointerSize).SizeValue(); 6472 __ Ldr(XRegisterFrom(locations->Out()), 6473 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); 6474 } else { 6475 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 6476 instruction->GetIndex(), kArm64PointerSize)); 6477 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), 6478 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 6479 __ Ldr(XRegisterFrom(locations->Out()), 6480 MemOperand(XRegisterFrom(locations->Out()), method_offset)); 6481 } 6482 } 6483 6484 static void PatchJitRootUse(uint8_t* code, 6485 const uint8_t* roots_data, 6486 vixl::aarch64::Literal<uint32_t>* literal, 6487 uint64_t index_in_table) { 6488 uint32_t literal_offset = literal->GetOffset(); 6489 uintptr_t address = 6490 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 6491 uint8_t* data = code + literal_offset; 6492 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); 6493 } 6494 6495 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 6496 for (const auto& entry : jit_string_patches_) { 6497 const StringReference& string_reference = entry.first; 6498 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6499 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 6500 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6501 } 6502 for (const auto& entry : jit_class_patches_) { 6503 const TypeReference& type_reference = entry.first; 6504 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6505 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 6506 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6507 } 6508 } 6509 6510 #undef __ 6511 #undef QUICK_ENTRY_POINT 6512 6513 } // namespace arm64 6514 } // namespace art 6515