1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_arm64.h" 18 19 #include "arch/arm64/asm_support_arm64.h" 20 #include "arch/arm64/instruction_set_features_arm64.h" 21 #include "art_method.h" 22 #include "base/bit_utils.h" 23 #include "base/bit_utils_iterator.h" 24 #include "class_table.h" 25 #include "code_generator_utils.h" 26 #include "compiled_method.h" 27 #include "entrypoints/quick/quick_entrypoints.h" 28 #include "entrypoints/quick/quick_entrypoints_enum.h" 29 #include "gc/accounting/card_table.h" 30 #include "gc/space/image_space.h" 31 #include "heap_poisoning.h" 32 #include "intrinsics.h" 33 #include "intrinsics_arm64.h" 34 #include "linker/linker_patch.h" 35 #include "lock_word.h" 36 #include "mirror/array-inl.h" 37 #include "mirror/class-inl.h" 38 #include "offsets.h" 39 #include "thread.h" 40 #include "utils/arm64/assembler_arm64.h" 41 #include "utils/assembler.h" 42 #include "utils/stack_checks.h" 43 44 using namespace vixl::aarch64; // NOLINT(build/namespaces) 45 using vixl::ExactAssemblyScope; 46 using vixl::CodeBufferCheckScope; 47 using vixl::EmissionCheckScope; 48 49 #ifdef __ 50 #error "ARM64 Codegen VIXL macro-assembler macro already defined." 51 #endif 52 53 namespace art { 54 55 template<class MirrorType> 56 class GcRoot; 57 58 namespace arm64 { 59 60 using helpers::ARM64EncodableConstantOrRegister; 61 using helpers::ArtVixlRegCodeCoherentForRegSet; 62 using helpers::CPURegisterFrom; 63 using helpers::DRegisterFrom; 64 using helpers::FPRegisterFrom; 65 using helpers::HeapOperand; 66 using helpers::HeapOperandFrom; 67 using helpers::InputCPURegisterOrZeroRegAt; 68 using helpers::InputFPRegisterAt; 69 using helpers::InputOperandAt; 70 using helpers::InputRegisterAt; 71 using helpers::Int64FromLocation; 72 using helpers::IsConstantZeroBitPattern; 73 using helpers::LocationFrom; 74 using helpers::OperandFromMemOperand; 75 using helpers::OutputCPURegister; 76 using helpers::OutputFPRegister; 77 using helpers::OutputRegister; 78 using helpers::QRegisterFrom; 79 using helpers::RegisterFrom; 80 using helpers::StackOperandFrom; 81 using helpers::VIXLRegCodeFromART; 82 using helpers::WRegisterFrom; 83 using helpers::XRegisterFrom; 84 85 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump 86 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 87 // generates less code/data with a small num_entries. 88 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; 89 90 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle 91 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. 92 // For the Baker read barrier implementation using link-time generated thunks we need to split 93 // the offset explicitly. 94 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; 95 96 inline Condition ARM64Condition(IfCondition cond) { 97 switch (cond) { 98 case kCondEQ: return eq; 99 case kCondNE: return ne; 100 case kCondLT: return lt; 101 case kCondLE: return le; 102 case kCondGT: return gt; 103 case kCondGE: return ge; 104 case kCondB: return lo; 105 case kCondBE: return ls; 106 case kCondA: return hi; 107 case kCondAE: return hs; 108 } 109 LOG(FATAL) << "Unreachable"; 110 UNREACHABLE(); 111 } 112 113 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { 114 // The ARM64 condition codes can express all the necessary branches, see the 115 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. 116 // There is no dex instruction or HIR that would need the missing conditions 117 // "equal or unordered" or "not equal". 118 switch (cond) { 119 case kCondEQ: return eq; 120 case kCondNE: return ne /* unordered */; 121 case kCondLT: return gt_bias ? cc : lt /* unordered */; 122 case kCondLE: return gt_bias ? ls : le /* unordered */; 123 case kCondGT: return gt_bias ? hi /* unordered */ : gt; 124 case kCondGE: return gt_bias ? cs /* unordered */ : ge; 125 default: 126 LOG(FATAL) << "UNREACHABLE"; 127 UNREACHABLE(); 128 } 129 } 130 131 Location ARM64ReturnLocation(DataType::Type return_type) { 132 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the 133 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, 134 // but we use the exact registers for clarity. 135 if (return_type == DataType::Type::kFloat32) { 136 return LocationFrom(s0); 137 } else if (return_type == DataType::Type::kFloat64) { 138 return LocationFrom(d0); 139 } else if (return_type == DataType::Type::kInt64) { 140 return LocationFrom(x0); 141 } else if (return_type == DataType::Type::kVoid) { 142 return Location::NoLocation(); 143 } else { 144 return LocationFrom(w0); 145 } 146 } 147 148 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) { 149 return ARM64ReturnLocation(return_type); 150 } 151 152 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { 153 InvokeRuntimeCallingConvention calling_convention; 154 RegisterSet caller_saves = RegisterSet::Empty(); 155 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 156 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), 157 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), 158 DataType::Type::kReference).GetCode()); 159 return caller_saves; 160 } 161 162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT 164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() 165 166 // Calculate memory accessing operand for save/restore live registers. 167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, 168 LocationSummary* locations, 169 int64_t spill_offset, 170 bool is_save) { 171 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); 172 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); 173 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, 174 codegen->GetNumberOfCoreRegisters(), 175 fp_spills, 176 codegen->GetNumberOfFloatingPointRegisters())); 177 178 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills); 179 unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize; 180 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills); 181 182 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler(); 183 UseScratchRegisterScope temps(masm); 184 185 Register base = masm->StackPointer(); 186 int64_t core_spill_size = core_list.GetTotalSizeInBytes(); 187 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes(); 188 int64_t reg_size = kXRegSizeInBytes; 189 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size; 190 uint32_t ls_access_size = WhichPowerOf2(reg_size); 191 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) && 192 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) { 193 // If the offset does not fit in the instruction's immediate field, use an alternate register 194 // to compute the base address(float point registers spill base address). 195 Register new_base = temps.AcquireSameSizeAs(base); 196 __ Add(new_base, base, Operand(spill_offset + core_spill_size)); 197 base = new_base; 198 spill_offset = -core_spill_size; 199 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size; 200 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size)); 201 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size)); 202 } 203 204 if (is_save) { 205 __ StoreCPURegList(core_list, MemOperand(base, spill_offset)); 206 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 207 } else { 208 __ LoadCPURegList(core_list, MemOperand(base, spill_offset)); 209 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 210 } 211 } 212 213 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 214 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); 215 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); 216 for (uint32_t i : LowToHighBits(core_spills)) { 217 // If the register holds an object, update the stack mask. 218 if (locations->RegisterContainsObject(i)) { 219 locations->SetStackBit(stack_offset / kVRegSize); 220 } 221 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 222 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 223 saved_core_stack_offsets_[i] = stack_offset; 224 stack_offset += kXRegSizeInBytes; 225 } 226 227 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); 228 for (uint32_t i : LowToHighBits(fp_spills)) { 229 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 230 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 231 saved_fpu_stack_offsets_[i] = stack_offset; 232 stack_offset += kDRegSizeInBytes; 233 } 234 235 SaveRestoreLiveRegistersHelper(codegen, 236 locations, 237 codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true); 238 } 239 240 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 241 SaveRestoreLiveRegistersHelper(codegen, 242 locations, 243 codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false); 244 } 245 246 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { 247 public: 248 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} 249 250 void EmitNativeCode(CodeGenerator* codegen) override { 251 LocationSummary* locations = instruction_->GetLocations(); 252 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 253 254 __ Bind(GetEntryLabel()); 255 if (instruction_->CanThrowIntoCatchBlock()) { 256 // Live registers will be restored in the catch block if caught. 257 SaveLiveRegisters(codegen, instruction_->GetLocations()); 258 } 259 // We're moving two locations to locations that could overlap, so we need a parallel 260 // move resolver. 261 InvokeRuntimeCallingConvention calling_convention; 262 codegen->EmitParallelMoves(locations->InAt(0), 263 LocationFrom(calling_convention.GetRegisterAt(0)), 264 DataType::Type::kInt32, 265 locations->InAt(1), 266 LocationFrom(calling_convention.GetRegisterAt(1)), 267 DataType::Type::kInt32); 268 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 269 ? kQuickThrowStringBounds 270 : kQuickThrowArrayBounds; 271 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 272 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 273 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 274 } 275 276 bool IsFatal() const override { return true; } 277 278 const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; } 279 280 private: 281 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); 282 }; 283 284 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { 285 public: 286 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} 287 288 void EmitNativeCode(CodeGenerator* codegen) override { 289 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 290 __ Bind(GetEntryLabel()); 291 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 292 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 293 } 294 295 bool IsFatal() const override { return true; } 296 297 const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; } 298 299 private: 300 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); 301 }; 302 303 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { 304 public: 305 LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at) 306 : SlowPathCodeARM64(at), cls_(cls) { 307 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 308 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); 309 } 310 311 void EmitNativeCode(CodeGenerator* codegen) override { 312 LocationSummary* locations = instruction_->GetLocations(); 313 Location out = locations->Out(); 314 const uint32_t dex_pc = instruction_->GetDexPc(); 315 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); 316 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); 317 318 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 319 __ Bind(GetEntryLabel()); 320 SaveLiveRegisters(codegen, locations); 321 322 InvokeRuntimeCallingConvention calling_convention; 323 if (must_resolve_type) { 324 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile())); 325 dex::TypeIndex type_index = cls_->GetTypeIndex(); 326 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); 327 arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); 328 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); 329 // If we also must_do_clinit, the resolved type is now in the correct register. 330 } else { 331 DCHECK(must_do_clinit); 332 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); 333 arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), 334 source, 335 cls_->GetType()); 336 } 337 if (must_do_clinit) { 338 arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); 339 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); 340 } 341 342 // Move the class to the desired location. 343 if (out.IsValid()) { 344 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 345 DataType::Type type = instruction_->GetType(); 346 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); 347 } 348 RestoreLiveRegisters(codegen, locations); 349 __ B(GetExitLabel()); 350 } 351 352 const char* GetDescription() const override { return "LoadClassSlowPathARM64"; } 353 354 private: 355 // The class this slow path will load. 356 HLoadClass* const cls_; 357 358 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); 359 }; 360 361 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { 362 public: 363 explicit LoadStringSlowPathARM64(HLoadString* instruction) 364 : SlowPathCodeARM64(instruction) {} 365 366 void EmitNativeCode(CodeGenerator* codegen) override { 367 LocationSummary* locations = instruction_->GetLocations(); 368 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 369 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 370 371 __ Bind(GetEntryLabel()); 372 SaveLiveRegisters(codegen, locations); 373 374 InvokeRuntimeCallingConvention calling_convention; 375 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 376 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_); 377 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); 378 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 379 DataType::Type type = instruction_->GetType(); 380 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); 381 382 RestoreLiveRegisters(codegen, locations); 383 384 __ B(GetExitLabel()); 385 } 386 387 const char* GetDescription() const override { return "LoadStringSlowPathARM64"; } 388 389 private: 390 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); 391 }; 392 393 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { 394 public: 395 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} 396 397 void EmitNativeCode(CodeGenerator* codegen) override { 398 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 399 __ Bind(GetEntryLabel()); 400 if (instruction_->CanThrowIntoCatchBlock()) { 401 // Live registers will be restored in the catch block if caught. 402 SaveLiveRegisters(codegen, instruction_->GetLocations()); 403 } 404 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer, 405 instruction_, 406 instruction_->GetDexPc(), 407 this); 408 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 409 } 410 411 bool IsFatal() const override { return true; } 412 413 const char* GetDescription() const override { return "NullCheckSlowPathARM64"; } 414 415 private: 416 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); 417 }; 418 419 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { 420 public: 421 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) 422 : SlowPathCodeARM64(instruction), successor_(successor) {} 423 424 void EmitNativeCode(CodeGenerator* codegen) override { 425 LocationSummary* locations = instruction_->GetLocations(); 426 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 427 __ Bind(GetEntryLabel()); 428 SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD. 429 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 430 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 431 RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD. 432 if (successor_ == nullptr) { 433 __ B(GetReturnLabel()); 434 } else { 435 __ B(arm64_codegen->GetLabelOf(successor_)); 436 } 437 } 438 439 vixl::aarch64::Label* GetReturnLabel() { 440 DCHECK(successor_ == nullptr); 441 return &return_label_; 442 } 443 444 HBasicBlock* GetSuccessor() const { 445 return successor_; 446 } 447 448 const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; } 449 450 private: 451 // If not null, the block to branch to after the suspend check. 452 HBasicBlock* const successor_; 453 454 // If `successor_` is null, the label to branch to after the suspend check. 455 vixl::aarch64::Label return_label_; 456 457 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64); 458 }; 459 460 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { 461 public: 462 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) 463 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} 464 465 void EmitNativeCode(CodeGenerator* codegen) override { 466 LocationSummary* locations = instruction_->GetLocations(); 467 468 DCHECK(instruction_->IsCheckCast() 469 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 470 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 471 uint32_t dex_pc = instruction_->GetDexPc(); 472 473 __ Bind(GetEntryLabel()); 474 475 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 476 SaveLiveRegisters(codegen, locations); 477 } 478 479 // We're moving two locations to locations that could overlap, so we need a parallel 480 // move resolver. 481 InvokeRuntimeCallingConvention calling_convention; 482 codegen->EmitParallelMoves(locations->InAt(0), 483 LocationFrom(calling_convention.GetRegisterAt(0)), 484 DataType::Type::kReference, 485 locations->InAt(1), 486 LocationFrom(calling_convention.GetRegisterAt(1)), 487 DataType::Type::kReference); 488 if (instruction_->IsInstanceOf()) { 489 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 490 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 491 DataType::Type ret_type = instruction_->GetType(); 492 Location ret_loc = calling_convention.GetReturnLocation(ret_type); 493 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); 494 } else { 495 DCHECK(instruction_->IsCheckCast()); 496 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 497 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 498 } 499 500 if (!is_fatal_) { 501 RestoreLiveRegisters(codegen, locations); 502 __ B(GetExitLabel()); 503 } 504 } 505 506 const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; } 507 bool IsFatal() const override { return is_fatal_; } 508 509 private: 510 const bool is_fatal_; 511 512 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); 513 }; 514 515 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { 516 public: 517 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) 518 : SlowPathCodeARM64(instruction) {} 519 520 void EmitNativeCode(CodeGenerator* codegen) override { 521 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 522 __ Bind(GetEntryLabel()); 523 LocationSummary* locations = instruction_->GetLocations(); 524 SaveLiveRegisters(codegen, locations); 525 InvokeRuntimeCallingConvention calling_convention; 526 __ Mov(calling_convention.GetRegisterAt(0), 527 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 528 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 529 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 530 } 531 532 const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; } 533 534 private: 535 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); 536 }; 537 538 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { 539 public: 540 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} 541 542 void EmitNativeCode(CodeGenerator* codegen) override { 543 LocationSummary* locations = instruction_->GetLocations(); 544 __ Bind(GetEntryLabel()); 545 SaveLiveRegisters(codegen, locations); 546 547 InvokeRuntimeCallingConvention calling_convention; 548 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 549 parallel_move.AddMove( 550 locations->InAt(0), 551 LocationFrom(calling_convention.GetRegisterAt(0)), 552 DataType::Type::kReference, 553 nullptr); 554 parallel_move.AddMove( 555 locations->InAt(1), 556 LocationFrom(calling_convention.GetRegisterAt(1)), 557 DataType::Type::kInt32, 558 nullptr); 559 parallel_move.AddMove( 560 locations->InAt(2), 561 LocationFrom(calling_convention.GetRegisterAt(2)), 562 DataType::Type::kReference, 563 nullptr); 564 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 565 566 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 567 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 568 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 569 RestoreLiveRegisters(codegen, locations); 570 __ B(GetExitLabel()); 571 } 572 573 const char* GetDescription() const override { return "ArraySetSlowPathARM64"; } 574 575 private: 576 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); 577 }; 578 579 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { 580 uint32_t num_entries = switch_instr_->GetNumEntries(); 581 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); 582 583 // We are about to use the assembler to place literals directly. Make sure we have enough 584 // underlying code buffer and we have generated the jump table with right size. 585 EmissionCheckScope scope(codegen->GetVIXLAssembler(), 586 num_entries * sizeof(int32_t), 587 CodeBufferCheckScope::kExactSize); 588 589 __ Bind(&table_start_); 590 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); 591 for (uint32_t i = 0; i < num_entries; i++) { 592 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]); 593 DCHECK(target_label->IsBound()); 594 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation(); 595 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); 596 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); 597 Literal<int32_t> literal(jump_offset); 598 __ place(&literal); 599 } 600 } 601 602 // Slow path generating a read barrier for a heap reference. 603 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { 604 public: 605 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, 606 Location out, 607 Location ref, 608 Location obj, 609 uint32_t offset, 610 Location index) 611 : SlowPathCodeARM64(instruction), 612 out_(out), 613 ref_(ref), 614 obj_(obj), 615 offset_(offset), 616 index_(index) { 617 DCHECK(kEmitCompilerReadBarrier); 618 // If `obj` is equal to `out` or `ref`, it means the initial object 619 // has been overwritten by (or after) the heap object reference load 620 // to be instrumented, e.g.: 621 // 622 // __ Ldr(out, HeapOperand(out, class_offset); 623 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 624 // 625 // In that case, we have lost the information about the original 626 // object, and the emitted read barrier cannot work properly. 627 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 628 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 629 } 630 631 void EmitNativeCode(CodeGenerator* codegen) override { 632 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 633 LocationSummary* locations = instruction_->GetLocations(); 634 DataType::Type type = DataType::Type::kReference; 635 DCHECK(locations->CanCall()); 636 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 637 DCHECK(instruction_->IsInstanceFieldGet() || 638 instruction_->IsStaticFieldGet() || 639 instruction_->IsArrayGet() || 640 instruction_->IsInstanceOf() || 641 instruction_->IsCheckCast() || 642 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 643 << "Unexpected instruction in read barrier for heap reference slow path: " 644 << instruction_->DebugName(); 645 // The read barrier instrumentation of object ArrayGet 646 // instructions does not support the HIntermediateAddress 647 // instruction. 648 DCHECK(!(instruction_->IsArrayGet() && 649 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 650 651 __ Bind(GetEntryLabel()); 652 653 SaveLiveRegisters(codegen, locations); 654 655 // We may have to change the index's value, but as `index_` is a 656 // constant member (like other "inputs" of this slow path), 657 // introduce a copy of it, `index`. 658 Location index = index_; 659 if (index_.IsValid()) { 660 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 661 if (instruction_->IsArrayGet()) { 662 // Compute the actual memory offset and store it in `index`. 663 Register index_reg = RegisterFrom(index_, DataType::Type::kInt32); 664 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); 665 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { 666 // We are about to change the value of `index_reg` (see the 667 // calls to vixl::MacroAssembler::Lsl and 668 // vixl::MacroAssembler::Mov below), but it has 669 // not been saved by the previous call to 670 // art::SlowPathCode::SaveLiveRegisters, as it is a 671 // callee-save register -- 672 // art::SlowPathCode::SaveLiveRegisters does not consider 673 // callee-save registers, as it has been designed with the 674 // assumption that callee-save registers are supposed to be 675 // handled by the called function. So, as a callee-save 676 // register, `index_reg` _would_ eventually be saved onto 677 // the stack, but it would be too late: we would have 678 // changed its value earlier. Therefore, we manually save 679 // it here into another freely available register, 680 // `free_reg`, chosen of course among the caller-save 681 // registers (as a callee-save `free_reg` register would 682 // exhibit the same problem). 683 // 684 // Note we could have requested a temporary register from 685 // the register allocator instead; but we prefer not to, as 686 // this is a slow path, and we know we can find a 687 // caller-save register that is available. 688 Register free_reg = FindAvailableCallerSaveRegister(codegen); 689 __ Mov(free_reg.W(), index_reg); 690 index_reg = free_reg; 691 index = LocationFrom(index_reg); 692 } else { 693 // The initial register stored in `index_` has already been 694 // saved in the call to art::SlowPathCode::SaveLiveRegisters 695 // (as it is not a callee-save register), so we can freely 696 // use it. 697 } 698 // Shifting the index value contained in `index_reg` by the scale 699 // factor (2) cannot overflow in practice, as the runtime is 700 // unable to allocate object arrays with a size larger than 701 // 2^26 - 1 (that is, 2^28 - 4 bytes). 702 __ Lsl(index_reg, index_reg, DataType::SizeShift(type)); 703 static_assert( 704 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 705 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 706 __ Add(index_reg, index_reg, Operand(offset_)); 707 } else { 708 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 709 // intrinsics, `index_` is not shifted by a scale factor of 2 710 // (as in the case of ArrayGet), as it is actually an offset 711 // to an object field within an object. 712 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 713 DCHECK(instruction_->GetLocations()->Intrinsified()); 714 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 715 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 716 << instruction_->AsInvoke()->GetIntrinsic(); 717 DCHECK_EQ(offset_, 0u); 718 DCHECK(index_.IsRegister()); 719 } 720 } 721 722 // We're moving two or three locations to locations that could 723 // overlap, so we need a parallel move resolver. 724 InvokeRuntimeCallingConvention calling_convention; 725 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 726 parallel_move.AddMove(ref_, 727 LocationFrom(calling_convention.GetRegisterAt(0)), 728 type, 729 nullptr); 730 parallel_move.AddMove(obj_, 731 LocationFrom(calling_convention.GetRegisterAt(1)), 732 type, 733 nullptr); 734 if (index.IsValid()) { 735 parallel_move.AddMove(index, 736 LocationFrom(calling_convention.GetRegisterAt(2)), 737 DataType::Type::kInt32, 738 nullptr); 739 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 740 } else { 741 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 742 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); 743 } 744 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 745 instruction_, 746 instruction_->GetDexPc(), 747 this); 748 CheckEntrypointTypes< 749 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 750 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 751 752 RestoreLiveRegisters(codegen, locations); 753 754 __ B(GetExitLabel()); 755 } 756 757 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; } 758 759 private: 760 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 761 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode()); 762 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode()); 763 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 764 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 765 return Register(VIXLRegCodeFromART(i), kXRegSize); 766 } 767 } 768 // We shall never fail to find a free caller-save register, as 769 // there are more than two core caller-save registers on ARM64 770 // (meaning it is possible to find one which is different from 771 // `ref` and `obj`). 772 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 773 LOG(FATAL) << "Could not find a free register"; 774 UNREACHABLE(); 775 } 776 777 const Location out_; 778 const Location ref_; 779 const Location obj_; 780 const uint32_t offset_; 781 // An additional location containing an index to an array. 782 // Only used for HArrayGet and the UnsafeGetObject & 783 // UnsafeGetObjectVolatile intrinsics. 784 const Location index_; 785 786 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); 787 }; 788 789 // Slow path generating a read barrier for a GC root. 790 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { 791 public: 792 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) 793 : SlowPathCodeARM64(instruction), out_(out), root_(root) { 794 DCHECK(kEmitCompilerReadBarrier); 795 } 796 797 void EmitNativeCode(CodeGenerator* codegen) override { 798 LocationSummary* locations = instruction_->GetLocations(); 799 DataType::Type type = DataType::Type::kReference; 800 DCHECK(locations->CanCall()); 801 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 802 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 803 << "Unexpected instruction in read barrier for GC root slow path: " 804 << instruction_->DebugName(); 805 806 __ Bind(GetEntryLabel()); 807 SaveLiveRegisters(codegen, locations); 808 809 InvokeRuntimeCallingConvention calling_convention; 810 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 811 // The argument of the ReadBarrierForRootSlow is not a managed 812 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; 813 // thus we need a 64-bit move here, and we cannot use 814 // 815 // arm64_codegen->MoveLocation( 816 // LocationFrom(calling_convention.GetRegisterAt(0)), 817 // root_, 818 // type); 819 // 820 // which would emit a 32-bit move, as `type` is a (32-bit wide) 821 // reference type (`DataType::Type::kReference`). 822 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); 823 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 824 instruction_, 825 instruction_->GetDexPc(), 826 this); 827 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 828 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 829 830 RestoreLiveRegisters(codegen, locations); 831 __ B(GetExitLabel()); 832 } 833 834 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; } 835 836 private: 837 const Location out_; 838 const Location root_; 839 840 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); 841 }; 842 843 #undef __ 844 845 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) { 846 Location next_location; 847 if (type == DataType::Type::kVoid) { 848 LOG(FATAL) << "Unreachable type " << type; 849 } 850 851 if (DataType::IsFloatingPointType(type) && 852 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { 853 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); 854 } else if (!DataType::IsFloatingPointType(type) && 855 (gp_index_ < calling_convention.GetNumberOfRegisters())) { 856 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); 857 } else { 858 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); 859 next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) 860 : Location::StackSlot(stack_offset); 861 } 862 863 // Space on the stack is reserved for all arguments. 864 stack_index_ += DataType::Is64BitType(type) ? 2 : 1; 865 return next_location; 866 } 867 868 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { 869 return LocationFrom(kArtMethodRegister); 870 } 871 872 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, 873 const CompilerOptions& compiler_options, 874 OptimizingCompilerStats* stats) 875 : CodeGenerator(graph, 876 kNumberOfAllocatableRegisters, 877 kNumberOfAllocatableFPRegisters, 878 kNumberOfAllocatableRegisterPairs, 879 callee_saved_core_registers.GetList(), 880 callee_saved_fp_registers.GetList(), 881 compiler_options, 882 stats), 883 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 884 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 885 location_builder_(graph, this), 886 instruction_visitor_(graph, this), 887 move_resolver_(graph->GetAllocator(), this), 888 assembler_(graph->GetAllocator(), 889 compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()), 890 uint32_literals_(std::less<uint32_t>(), 891 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 892 uint64_literals_(std::less<uint64_t>(), 893 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 894 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 895 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 896 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 897 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 898 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 899 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 900 boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 901 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 902 jit_string_patches_(StringReferenceValueComparator(), 903 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 904 jit_class_patches_(TypeReferenceValueComparator(), 905 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 906 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), 907 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 908 // Save the link register (containing the return address) to mimic Quick. 909 AddAllocatedRegister(LocationFrom(lr)); 910 } 911 912 #define __ GetVIXLAssembler()-> 913 914 void CodeGeneratorARM64::EmitJumpTables() { 915 for (auto&& jump_table : jump_tables_) { 916 jump_table->EmitTable(this); 917 } 918 } 919 920 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { 921 EmitJumpTables(); 922 923 // Emit JIT baker read barrier slow paths. 924 DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); 925 for (auto& entry : jit_baker_read_barrier_slow_paths_) { 926 uint32_t encoded_data = entry.first; 927 vixl::aarch64::Label* slow_path_entry = &entry.second.label; 928 __ Bind(slow_path_entry); 929 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr); 930 } 931 932 // Ensure we emit the literal pool. 933 __ FinalizeCode(); 934 935 CodeGenerator::Finalize(allocator); 936 937 // Verify Baker read barrier linker patches. 938 if (kIsDebugBuild) { 939 ArrayRef<const uint8_t> code = allocator->GetMemory(); 940 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { 941 DCHECK(info.label.IsBound()); 942 uint32_t literal_offset = info.label.GetLocation(); 943 DCHECK_ALIGNED(literal_offset, 4u); 944 945 auto GetInsn = [&code](uint32_t offset) { 946 DCHECK_ALIGNED(offset, 4u); 947 return 948 (static_cast<uint32_t>(code[offset + 0]) << 0) + 949 (static_cast<uint32_t>(code[offset + 1]) << 8) + 950 (static_cast<uint32_t>(code[offset + 2]) << 16)+ 951 (static_cast<uint32_t>(code[offset + 3]) << 24); 952 }; 953 954 const uint32_t encoded_data = info.custom_data; 955 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); 956 // Check that the next instruction matches the expected LDR. 957 switch (kind) { 958 case BakerReadBarrierKind::kField: 959 case BakerReadBarrierKind::kAcquire: { 960 DCHECK_GE(code.size() - literal_offset, 8u); 961 uint32_t next_insn = GetInsn(literal_offset + 4u); 962 CheckValidReg(next_insn & 0x1fu); // Check destination register. 963 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); 964 if (kind == BakerReadBarrierKind::kField) { 965 // LDR (immediate) with correct base_reg. 966 CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); 967 } else { 968 DCHECK(kind == BakerReadBarrierKind::kAcquire); 969 // LDAR with correct base_reg. 970 CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5)); 971 } 972 break; 973 } 974 case BakerReadBarrierKind::kArray: { 975 DCHECK_GE(code.size() - literal_offset, 8u); 976 uint32_t next_insn = GetInsn(literal_offset + 4u); 977 // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), 978 // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. 979 CheckValidReg(next_insn & 0x1fu); // Check destination register. 980 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); 981 CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); 982 CheckValidReg((next_insn >> 16) & 0x1f); // Check index register 983 break; 984 } 985 case BakerReadBarrierKind::kGcRoot: { 986 DCHECK_GE(literal_offset, 4u); 987 uint32_t prev_insn = GetInsn(literal_offset - 4u); 988 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); 989 // Usually LDR (immediate) with correct root_reg but 990 // we may have a "MOV marked, old_value" for UnsafeCASObject. 991 if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV? 992 CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR? 993 } 994 break; 995 } 996 default: 997 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); 998 UNREACHABLE(); 999 } 1000 } 1001 } 1002 } 1003 1004 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { 1005 // Note: There are 6 kinds of moves: 1006 // 1. constant -> GPR/FPR (non-cycle) 1007 // 2. constant -> stack (non-cycle) 1008 // 3. GPR/FPR -> GPR/FPR 1009 // 4. GPR/FPR -> stack 1010 // 5. stack -> GPR/FPR 1011 // 6. stack -> stack (non-cycle) 1012 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 1013 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting 1014 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the 1015 // dependency. 1016 vixl_temps_.Open(GetVIXLAssembler()); 1017 } 1018 1019 void ParallelMoveResolverARM64::FinishEmitNativeCode() { 1020 vixl_temps_.Close(); 1021 } 1022 1023 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { 1024 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister 1025 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot 1026 || kind == Location::kSIMDStackSlot); 1027 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot) 1028 ? Location::kFpuRegister 1029 : Location::kRegister; 1030 Location scratch = GetScratchLocation(kind); 1031 if (!scratch.Equals(Location::NoLocation())) { 1032 return scratch; 1033 } 1034 // Allocate from VIXL temp registers. 1035 if (kind == Location::kRegister) { 1036 scratch = LocationFrom(vixl_temps_.AcquireX()); 1037 } else { 1038 DCHECK_EQ(kind, Location::kFpuRegister); 1039 scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() 1040 ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) 1041 : vixl_temps_.AcquireD()); 1042 } 1043 AddScratchLocation(scratch); 1044 return scratch; 1045 } 1046 1047 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { 1048 if (loc.IsRegister()) { 1049 vixl_temps_.Release(XRegisterFrom(loc)); 1050 } else { 1051 DCHECK(loc.IsFpuRegister()); 1052 vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc)); 1053 } 1054 RemoveScratchLocation(loc); 1055 } 1056 1057 void ParallelMoveResolverARM64::EmitMove(size_t index) { 1058 MoveOperands* move = moves_[index]; 1059 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); 1060 } 1061 1062 void CodeGeneratorARM64::GenerateFrameEntry() { 1063 MacroAssembler* masm = GetVIXLAssembler(); 1064 __ Bind(&frame_entry_label_); 1065 1066 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 1067 UseScratchRegisterScope temps(masm); 1068 Register temp = temps.AcquireX(); 1069 __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 1070 __ Add(temp, temp, 1); 1071 __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); 1072 } 1073 1074 bool do_overflow_check = 1075 FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); 1076 if (do_overflow_check) { 1077 UseScratchRegisterScope temps(masm); 1078 Register temp = temps.AcquireX(); 1079 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1080 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64))); 1081 { 1082 // Ensure that between load and RecordPcInfo there are no pools emitted. 1083 ExactAssemblyScope eas(GetVIXLAssembler(), 1084 kInstructionSize, 1085 CodeBufferCheckScope::kExactSize); 1086 __ ldr(wzr, MemOperand(temp, 0)); 1087 RecordPcInfo(nullptr, 0); 1088 } 1089 } 1090 1091 if (!HasEmptyFrame()) { 1092 int frame_size = GetFrameSize(); 1093 // Stack layout: 1094 // sp[frame_size - 8] : lr. 1095 // ... : other preserved core registers. 1096 // ... : other preserved fp registers. 1097 // ... : reserved frame space. 1098 // sp[0] : current method. 1099 1100 // Save the current method if we need it. Note that we do not 1101 // do this in HCurrentMethod, as the instruction might have been removed 1102 // in the SSA graph. 1103 if (RequiresCurrentMethod()) { 1104 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); 1105 } else { 1106 __ Claim(frame_size); 1107 } 1108 GetAssembler()->cfi().AdjustCFAOffset(frame_size); 1109 GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), 1110 frame_size - GetCoreSpillSize()); 1111 GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), 1112 frame_size - FrameEntrySpillSize()); 1113 1114 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1115 // Initialize should_deoptimize flag to 0. 1116 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize); 1117 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); 1118 } 1119 } 1120 1121 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 1122 } 1123 1124 void CodeGeneratorARM64::GenerateFrameExit() { 1125 GetAssembler()->cfi().RememberState(); 1126 if (!HasEmptyFrame()) { 1127 int frame_size = GetFrameSize(); 1128 GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(), 1129 frame_size - FrameEntrySpillSize()); 1130 GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(), 1131 frame_size - GetCoreSpillSize()); 1132 __ Drop(frame_size); 1133 GetAssembler()->cfi().AdjustCFAOffset(-frame_size); 1134 } 1135 __ Ret(); 1136 GetAssembler()->cfi().RestoreState(); 1137 GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); 1138 } 1139 1140 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const { 1141 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0)); 1142 return CPURegList(CPURegister::kRegister, kXRegSize, 1143 core_spill_mask_); 1144 } 1145 1146 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const { 1147 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_, 1148 GetNumberOfFloatingPointRegisters())); 1149 return CPURegList(CPURegister::kFPRegister, kDRegSize, 1150 fpu_spill_mask_); 1151 } 1152 1153 void CodeGeneratorARM64::Bind(HBasicBlock* block) { 1154 __ Bind(GetLabelOf(block)); 1155 } 1156 1157 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { 1158 DCHECK(location.IsRegister()); 1159 __ Mov(RegisterFrom(location, DataType::Type::kInt32), value); 1160 } 1161 1162 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1163 if (location.IsRegister()) { 1164 locations->AddTemp(location); 1165 } else { 1166 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1167 } 1168 } 1169 1170 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { 1171 UseScratchRegisterScope temps(GetVIXLAssembler()); 1172 Register card = temps.AcquireX(); 1173 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. 1174 vixl::aarch64::Label done; 1175 if (value_can_be_null) { 1176 __ Cbz(value, &done); 1177 } 1178 // Load the address of the card table into `card`. 1179 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value())); 1180 // Calculate the offset (in the card table) of the card corresponding to 1181 // `object`. 1182 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); 1183 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the 1184 // `object`'s card. 1185 // 1186 // Register `card` contains the address of the card table. Note that the card 1187 // table's base is biased during its creation so that it always starts at an 1188 // address whose least-significant byte is equal to `kCardDirty` (see 1189 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction 1190 // below writes the `kCardDirty` (byte) value into the `object`'s card 1191 // (located at `card + object >> kCardShift`). 1192 // 1193 // This dual use of the value in register `card` (1. to calculate the location 1194 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load 1195 // (no need to explicitly load `kCardDirty` as an immediate value). 1196 __ Strb(card, MemOperand(card, temp.X())); 1197 if (value_can_be_null) { 1198 __ Bind(&done); 1199 } 1200 } 1201 1202 void CodeGeneratorARM64::SetupBlockedRegisters() const { 1203 // Blocked core registers: 1204 // lr : Runtime reserved. 1205 // tr : Runtime reserved. 1206 // mr : Runtime reserved. 1207 // ip1 : VIXL core temp. 1208 // ip0 : VIXL core temp. 1209 // x18 : Platform register. 1210 // 1211 // Blocked fp registers: 1212 // d31 : VIXL fp temp. 1213 CPURegList reserved_core_registers = vixl_reserved_core_registers; 1214 reserved_core_registers.Combine(runtime_reserved_core_registers); 1215 while (!reserved_core_registers.IsEmpty()) { 1216 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; 1217 } 1218 blocked_core_registers_[X18] = true; 1219 1220 CPURegList reserved_fp_registers = vixl_reserved_fp_registers; 1221 while (!reserved_fp_registers.IsEmpty()) { 1222 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true; 1223 } 1224 1225 if (GetGraph()->IsDebuggable()) { 1226 // Stubs do not save callee-save floating point registers. If the graph 1227 // is debuggable, we need to deal with these registers differently. For 1228 // now, just block them. 1229 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; 1230 while (!reserved_fp_registers_debuggable.IsEmpty()) { 1231 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true; 1232 } 1233 } 1234 } 1235 1236 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1237 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1238 __ Str(reg, MemOperand(sp, stack_index)); 1239 return kArm64WordSize; 1240 } 1241 1242 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1243 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1244 __ Ldr(reg, MemOperand(sp, stack_index)); 1245 return kArm64WordSize; 1246 } 1247 1248 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1249 FPRegister reg = FPRegister(reg_id, kDRegSize); 1250 __ Str(reg, MemOperand(sp, stack_index)); 1251 return kArm64WordSize; 1252 } 1253 1254 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1255 FPRegister reg = FPRegister(reg_id, kDRegSize); 1256 __ Ldr(reg, MemOperand(sp, stack_index)); 1257 return kArm64WordSize; 1258 } 1259 1260 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { 1261 stream << XRegister(reg); 1262 } 1263 1264 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1265 stream << DRegister(reg); 1266 } 1267 1268 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const { 1269 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures(); 1270 } 1271 1272 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { 1273 if (constant->IsIntConstant()) { 1274 __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); 1275 } else if (constant->IsLongConstant()) { 1276 __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); 1277 } else if (constant->IsNullConstant()) { 1278 __ Mov(Register(destination), 0); 1279 } else if (constant->IsFloatConstant()) { 1280 __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue()); 1281 } else { 1282 DCHECK(constant->IsDoubleConstant()); 1283 __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue()); 1284 } 1285 } 1286 1287 1288 static bool CoherentConstantAndType(Location constant, DataType::Type type) { 1289 DCHECK(constant.IsConstant()); 1290 HConstant* cst = constant.GetConstant(); 1291 return (cst->IsIntConstant() && type == DataType::Type::kInt32) || 1292 // Null is mapped to a core W register, which we associate with kPrimInt. 1293 (cst->IsNullConstant() && type == DataType::Type::kInt32) || 1294 (cst->IsLongConstant() && type == DataType::Type::kInt64) || 1295 (cst->IsFloatConstant() && type == DataType::Type::kFloat32) || 1296 (cst->IsDoubleConstant() && type == DataType::Type::kFloat64); 1297 } 1298 1299 // Allocate a scratch register from the VIXL pool, querying first 1300 // the floating-point register pool, and then the core register 1301 // pool. This is essentially a reimplementation of 1302 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize 1303 // using a different allocation strategy. 1304 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, 1305 vixl::aarch64::UseScratchRegisterScope* temps, 1306 int size_in_bits) { 1307 return masm->GetScratchFPRegisterList()->IsEmpty() 1308 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits)) 1309 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits)); 1310 } 1311 1312 void CodeGeneratorARM64::MoveLocation(Location destination, 1313 Location source, 1314 DataType::Type dst_type) { 1315 if (source.Equals(destination)) { 1316 return; 1317 } 1318 1319 // A valid move can always be inferred from the destination and source 1320 // locations. When moving from and to a register, the argument type can be 1321 // used to generate 32bit instead of 64bit moves. In debug mode we also 1322 // checks the coherency of the locations and the type. 1323 bool unspecified_type = (dst_type == DataType::Type::kVoid); 1324 1325 if (destination.IsRegister() || destination.IsFpuRegister()) { 1326 if (unspecified_type) { 1327 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; 1328 if (source.IsStackSlot() || 1329 (src_cst != nullptr && (src_cst->IsIntConstant() 1330 || src_cst->IsFloatConstant() 1331 || src_cst->IsNullConstant()))) { 1332 // For stack slots and 32bit constants, a 64bit type is appropriate. 1333 dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; 1334 } else { 1335 // If the source is a double stack slot or a 64bit constant, a 64bit 1336 // type is appropriate. Else the source is a register, and since the 1337 // type has not been specified, we chose a 64bit type to force a 64bit 1338 // move. 1339 dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; 1340 } 1341 } 1342 DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || 1343 (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); 1344 CPURegister dst = CPURegisterFrom(destination, dst_type); 1345 if (source.IsStackSlot() || source.IsDoubleStackSlot()) { 1346 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); 1347 __ Ldr(dst, StackOperandFrom(source)); 1348 } else if (source.IsSIMDStackSlot()) { 1349 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source)); 1350 } else if (source.IsConstant()) { 1351 DCHECK(CoherentConstantAndType(source, dst_type)); 1352 MoveConstant(dst, source.GetConstant()); 1353 } else if (source.IsRegister()) { 1354 if (destination.IsRegister()) { 1355 __ Mov(Register(dst), RegisterFrom(source, dst_type)); 1356 } else { 1357 DCHECK(destination.IsFpuRegister()); 1358 DataType::Type source_type = DataType::Is64BitType(dst_type) 1359 ? DataType::Type::kInt64 1360 : DataType::Type::kInt32; 1361 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); 1362 } 1363 } else { 1364 DCHECK(source.IsFpuRegister()); 1365 if (destination.IsRegister()) { 1366 DataType::Type source_type = DataType::Is64BitType(dst_type) 1367 ? DataType::Type::kFloat64 1368 : DataType::Type::kFloat32; 1369 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); 1370 } else { 1371 DCHECK(destination.IsFpuRegister()); 1372 if (GetGraph()->HasSIMD()) { 1373 __ Mov(QRegisterFrom(destination), QRegisterFrom(source)); 1374 } else { 1375 __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type)); 1376 } 1377 } 1378 } 1379 } else if (destination.IsSIMDStackSlot()) { 1380 if (source.IsFpuRegister()) { 1381 __ Str(QRegisterFrom(source), StackOperandFrom(destination)); 1382 } else { 1383 DCHECK(source.IsSIMDStackSlot()); 1384 UseScratchRegisterScope temps(GetVIXLAssembler()); 1385 if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) { 1386 Register temp = temps.AcquireX(); 1387 __ Ldr(temp, MemOperand(sp, source.GetStackIndex())); 1388 __ Str(temp, MemOperand(sp, destination.GetStackIndex())); 1389 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize)); 1390 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize)); 1391 } else { 1392 FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); 1393 __ Ldr(temp, StackOperandFrom(source)); 1394 __ Str(temp, StackOperandFrom(destination)); 1395 } 1396 } 1397 } else { // The destination is not a register. It must be a stack slot. 1398 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); 1399 if (source.IsRegister() || source.IsFpuRegister()) { 1400 if (unspecified_type) { 1401 if (source.IsRegister()) { 1402 dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; 1403 } else { 1404 dst_type = 1405 destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; 1406 } 1407 } 1408 DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && 1409 (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); 1410 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); 1411 } else if (source.IsConstant()) { 1412 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) 1413 << source << " " << dst_type; 1414 UseScratchRegisterScope temps(GetVIXLAssembler()); 1415 HConstant* src_cst = source.GetConstant(); 1416 CPURegister temp; 1417 if (src_cst->IsZeroBitPattern()) { 1418 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) 1419 ? Register(xzr) 1420 : Register(wzr); 1421 } else { 1422 if (src_cst->IsIntConstant()) { 1423 temp = temps.AcquireW(); 1424 } else if (src_cst->IsLongConstant()) { 1425 temp = temps.AcquireX(); 1426 } else if (src_cst->IsFloatConstant()) { 1427 temp = temps.AcquireS(); 1428 } else { 1429 DCHECK(src_cst->IsDoubleConstant()); 1430 temp = temps.AcquireD(); 1431 } 1432 MoveConstant(temp, src_cst); 1433 } 1434 __ Str(temp, StackOperandFrom(destination)); 1435 } else { 1436 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); 1437 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); 1438 UseScratchRegisterScope temps(GetVIXLAssembler()); 1439 // Use any scratch register (a core or a floating-point one) 1440 // from VIXL scratch register pools as a temporary. 1441 // 1442 // We used to only use the FP scratch register pool, but in some 1443 // rare cases the only register from this pool (D31) would 1444 // already be used (e.g. within a ParallelMove instruction, when 1445 // a move is blocked by a another move requiring a scratch FP 1446 // register, which would reserve D31). To prevent this issue, we 1447 // ask for a scratch register of any type (core or FP). 1448 // 1449 // Also, we start by asking for a FP scratch register first, as the 1450 // demand of scratch core registers is higher. This is why we 1451 // use AcquireFPOrCoreCPURegisterOfSize instead of 1452 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which 1453 // allocates core scratch registers first. 1454 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize( 1455 GetVIXLAssembler(), 1456 &temps, 1457 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize)); 1458 __ Ldr(temp, StackOperandFrom(source)); 1459 __ Str(temp, StackOperandFrom(destination)); 1460 } 1461 } 1462 } 1463 1464 void CodeGeneratorARM64::Load(DataType::Type type, 1465 CPURegister dst, 1466 const MemOperand& src) { 1467 switch (type) { 1468 case DataType::Type::kBool: 1469 case DataType::Type::kUint8: 1470 __ Ldrb(Register(dst), src); 1471 break; 1472 case DataType::Type::kInt8: 1473 __ Ldrsb(Register(dst), src); 1474 break; 1475 case DataType::Type::kUint16: 1476 __ Ldrh(Register(dst), src); 1477 break; 1478 case DataType::Type::kInt16: 1479 __ Ldrsh(Register(dst), src); 1480 break; 1481 case DataType::Type::kInt32: 1482 case DataType::Type::kReference: 1483 case DataType::Type::kInt64: 1484 case DataType::Type::kFloat32: 1485 case DataType::Type::kFloat64: 1486 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1487 __ Ldr(dst, src); 1488 break; 1489 case DataType::Type::kUint32: 1490 case DataType::Type::kUint64: 1491 case DataType::Type::kVoid: 1492 LOG(FATAL) << "Unreachable type " << type; 1493 } 1494 } 1495 1496 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, 1497 CPURegister dst, 1498 const MemOperand& src, 1499 bool needs_null_check) { 1500 MacroAssembler* masm = GetVIXLAssembler(); 1501 UseScratchRegisterScope temps(masm); 1502 Register temp_base = temps.AcquireX(); 1503 DataType::Type type = instruction->GetType(); 1504 1505 DCHECK(!src.IsPreIndex()); 1506 DCHECK(!src.IsPostIndex()); 1507 1508 // TODO(vixl): Let the MacroAssembler handle MemOperand. 1509 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src)); 1510 { 1511 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 1512 MemOperand base = MemOperand(temp_base); 1513 switch (type) { 1514 case DataType::Type::kBool: 1515 case DataType::Type::kUint8: 1516 case DataType::Type::kInt8: 1517 { 1518 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1519 __ ldarb(Register(dst), base); 1520 if (needs_null_check) { 1521 MaybeRecordImplicitNullCheck(instruction); 1522 } 1523 } 1524 if (type == DataType::Type::kInt8) { 1525 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); 1526 } 1527 break; 1528 case DataType::Type::kUint16: 1529 case DataType::Type::kInt16: 1530 { 1531 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1532 __ ldarh(Register(dst), base); 1533 if (needs_null_check) { 1534 MaybeRecordImplicitNullCheck(instruction); 1535 } 1536 } 1537 if (type == DataType::Type::kInt16) { 1538 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); 1539 } 1540 break; 1541 case DataType::Type::kInt32: 1542 case DataType::Type::kReference: 1543 case DataType::Type::kInt64: 1544 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1545 { 1546 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1547 __ ldar(Register(dst), base); 1548 if (needs_null_check) { 1549 MaybeRecordImplicitNullCheck(instruction); 1550 } 1551 } 1552 break; 1553 case DataType::Type::kFloat32: 1554 case DataType::Type::kFloat64: { 1555 DCHECK(dst.IsFPRegister()); 1556 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1557 1558 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 1559 { 1560 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1561 __ ldar(temp, base); 1562 if (needs_null_check) { 1563 MaybeRecordImplicitNullCheck(instruction); 1564 } 1565 } 1566 __ Fmov(FPRegister(dst), temp); 1567 break; 1568 } 1569 case DataType::Type::kUint32: 1570 case DataType::Type::kUint64: 1571 case DataType::Type::kVoid: 1572 LOG(FATAL) << "Unreachable type " << type; 1573 } 1574 } 1575 } 1576 1577 void CodeGeneratorARM64::Store(DataType::Type type, 1578 CPURegister src, 1579 const MemOperand& dst) { 1580 switch (type) { 1581 case DataType::Type::kBool: 1582 case DataType::Type::kUint8: 1583 case DataType::Type::kInt8: 1584 __ Strb(Register(src), dst); 1585 break; 1586 case DataType::Type::kUint16: 1587 case DataType::Type::kInt16: 1588 __ Strh(Register(src), dst); 1589 break; 1590 case DataType::Type::kInt32: 1591 case DataType::Type::kReference: 1592 case DataType::Type::kInt64: 1593 case DataType::Type::kFloat32: 1594 case DataType::Type::kFloat64: 1595 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 1596 __ Str(src, dst); 1597 break; 1598 case DataType::Type::kUint32: 1599 case DataType::Type::kUint64: 1600 case DataType::Type::kVoid: 1601 LOG(FATAL) << "Unreachable type " << type; 1602 } 1603 } 1604 1605 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, 1606 DataType::Type type, 1607 CPURegister src, 1608 const MemOperand& dst, 1609 bool needs_null_check) { 1610 MacroAssembler* masm = GetVIXLAssembler(); 1611 UseScratchRegisterScope temps(GetVIXLAssembler()); 1612 Register temp_base = temps.AcquireX(); 1613 1614 DCHECK(!dst.IsPreIndex()); 1615 DCHECK(!dst.IsPostIndex()); 1616 1617 // TODO(vixl): Let the MacroAssembler handle this. 1618 Operand op = OperandFromMemOperand(dst); 1619 __ Add(temp_base, dst.GetBaseRegister(), op); 1620 MemOperand base = MemOperand(temp_base); 1621 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 1622 switch (type) { 1623 case DataType::Type::kBool: 1624 case DataType::Type::kUint8: 1625 case DataType::Type::kInt8: 1626 { 1627 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1628 __ stlrb(Register(src), base); 1629 if (needs_null_check) { 1630 MaybeRecordImplicitNullCheck(instruction); 1631 } 1632 } 1633 break; 1634 case DataType::Type::kUint16: 1635 case DataType::Type::kInt16: 1636 { 1637 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1638 __ stlrh(Register(src), base); 1639 if (needs_null_check) { 1640 MaybeRecordImplicitNullCheck(instruction); 1641 } 1642 } 1643 break; 1644 case DataType::Type::kInt32: 1645 case DataType::Type::kReference: 1646 case DataType::Type::kInt64: 1647 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 1648 { 1649 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1650 __ stlr(Register(src), base); 1651 if (needs_null_check) { 1652 MaybeRecordImplicitNullCheck(instruction); 1653 } 1654 } 1655 break; 1656 case DataType::Type::kFloat32: 1657 case DataType::Type::kFloat64: { 1658 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 1659 Register temp_src; 1660 if (src.IsZero()) { 1661 // The zero register is used to avoid synthesizing zero constants. 1662 temp_src = Register(src); 1663 } else { 1664 DCHECK(src.IsFPRegister()); 1665 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 1666 __ Fmov(temp_src, FPRegister(src)); 1667 } 1668 { 1669 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1670 __ stlr(temp_src, base); 1671 if (needs_null_check) { 1672 MaybeRecordImplicitNullCheck(instruction); 1673 } 1674 } 1675 break; 1676 } 1677 case DataType::Type::kUint32: 1678 case DataType::Type::kUint64: 1679 case DataType::Type::kVoid: 1680 LOG(FATAL) << "Unreachable type " << type; 1681 } 1682 } 1683 1684 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, 1685 HInstruction* instruction, 1686 uint32_t dex_pc, 1687 SlowPathCode* slow_path) { 1688 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 1689 1690 __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value())); 1691 { 1692 // Ensure the pc position is recorded immediately after the `blr` instruction. 1693 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 1694 __ blr(lr); 1695 if (EntrypointRequiresStackMap(entrypoint)) { 1696 RecordPcInfo(instruction, dex_pc, slow_path); 1697 } 1698 } 1699 } 1700 1701 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 1702 HInstruction* instruction, 1703 SlowPathCode* slow_path) { 1704 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 1705 __ Ldr(lr, MemOperand(tr, entry_point_offset)); 1706 __ Blr(lr); 1707 } 1708 1709 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, 1710 Register class_reg) { 1711 UseScratchRegisterScope temps(GetVIXLAssembler()); 1712 Register temp = temps.AcquireW(); 1713 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 1714 const size_t status_byte_offset = 1715 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 1716 constexpr uint32_t shifted_initialized_value = 1717 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); 1718 1719 // Even if the initialized flag is set, we need to ensure consistent memory ordering. 1720 // TODO(vixl): Let the MacroAssembler handle MemOperand. 1721 __ Add(temp, class_reg, status_byte_offset); 1722 __ Ldarb(temp, HeapOperand(temp)); 1723 __ Cmp(temp, shifted_initialized_value); 1724 __ B(lo, slow_path->GetEntryLabel()); 1725 __ Bind(slow_path->GetExitLabel()); 1726 } 1727 1728 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( 1729 HTypeCheckInstruction* check, vixl::aarch64::Register temp) { 1730 uint32_t path_to_root = check->GetBitstringPathToRoot(); 1731 uint32_t mask = check->GetBitstringMask(); 1732 DCHECK(IsPowerOfTwo(mask + 1)); 1733 size_t mask_bits = WhichPowerOf2(mask + 1); 1734 1735 if (mask_bits == 16u) { 1736 // Load only the bitstring part of the status word. 1737 __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); 1738 } else { 1739 // /* uint32_t */ temp = temp->status_ 1740 __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); 1741 // Extract the bitstring bits. 1742 __ Ubfx(temp, temp, 0, mask_bits); 1743 } 1744 // Compare the bitstring bits to `path_to_root`. 1745 __ Cmp(temp, path_to_root); 1746 } 1747 1748 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { 1749 BarrierType type = BarrierAll; 1750 1751 switch (kind) { 1752 case MemBarrierKind::kAnyAny: 1753 case MemBarrierKind::kAnyStore: { 1754 type = BarrierAll; 1755 break; 1756 } 1757 case MemBarrierKind::kLoadAny: { 1758 type = BarrierReads; 1759 break; 1760 } 1761 case MemBarrierKind::kStoreStore: { 1762 type = BarrierWrites; 1763 break; 1764 } 1765 default: 1766 LOG(FATAL) << "Unexpected memory barrier " << kind; 1767 } 1768 __ Dmb(InnerShareable, type); 1769 } 1770 1771 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, 1772 HBasicBlock* successor) { 1773 SuspendCheckSlowPathARM64* slow_path = 1774 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); 1775 if (slow_path == nullptr) { 1776 slow_path = 1777 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor); 1778 instruction->SetSlowPath(slow_path); 1779 codegen_->AddSlowPath(slow_path); 1780 if (successor != nullptr) { 1781 DCHECK(successor->IsLoopHeader()); 1782 } 1783 } else { 1784 DCHECK_EQ(slow_path->GetSuccessor(), successor); 1785 } 1786 1787 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 1788 Register temp = temps.AcquireW(); 1789 1790 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue())); 1791 if (successor == nullptr) { 1792 __ Cbnz(temp, slow_path->GetEntryLabel()); 1793 __ Bind(slow_path->GetReturnLabel()); 1794 } else { 1795 __ Cbz(temp, codegen_->GetLabelOf(successor)); 1796 __ B(slow_path->GetEntryLabel()); 1797 // slow_path will return to GetLabelOf(successor). 1798 } 1799 } 1800 1801 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, 1802 CodeGeneratorARM64* codegen) 1803 : InstructionCodeGenerator(graph, codegen), 1804 assembler_(codegen->GetAssembler()), 1805 codegen_(codegen) {} 1806 1807 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { 1808 DCHECK_EQ(instr->InputCount(), 2U); 1809 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 1810 DataType::Type type = instr->GetResultType(); 1811 switch (type) { 1812 case DataType::Type::kInt32: 1813 case DataType::Type::kInt64: 1814 locations->SetInAt(0, Location::RequiresRegister()); 1815 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr)); 1816 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 1817 break; 1818 1819 case DataType::Type::kFloat32: 1820 case DataType::Type::kFloat64: 1821 locations->SetInAt(0, Location::RequiresFpuRegister()); 1822 locations->SetInAt(1, Location::RequiresFpuRegister()); 1823 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 1824 break; 1825 1826 default: 1827 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type; 1828 } 1829 } 1830 1831 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, 1832 const FieldInfo& field_info) { 1833 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 1834 1835 bool object_field_get_with_read_barrier = 1836 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 1837 LocationSummary* locations = 1838 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 1839 object_field_get_with_read_barrier 1840 ? LocationSummary::kCallOnSlowPath 1841 : LocationSummary::kNoCall); 1842 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 1843 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 1844 // We need a temporary register for the read barrier load in 1845 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() 1846 // only if the field is volatile or the offset is too big. 1847 if (field_info.IsVolatile() || 1848 field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { 1849 locations->AddTemp(FixedTempLocation()); 1850 } 1851 } 1852 locations->SetInAt(0, Location::RequiresRegister()); 1853 if (DataType::IsFloatingPointType(instruction->GetType())) { 1854 locations->SetOut(Location::RequiresFpuRegister()); 1855 } else { 1856 // The output overlaps for an object field get when read barriers 1857 // are enabled: we do not want the load to overwrite the object's 1858 // location, as we need it to emit the read barrier. 1859 locations->SetOut( 1860 Location::RequiresRegister(), 1861 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 1862 } 1863 } 1864 1865 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, 1866 const FieldInfo& field_info) { 1867 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 1868 LocationSummary* locations = instruction->GetLocations(); 1869 Location base_loc = locations->InAt(0); 1870 Location out = locations->Out(); 1871 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 1872 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 1873 DataType::Type load_type = instruction->GetType(); 1874 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); 1875 1876 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && 1877 load_type == DataType::Type::kReference) { 1878 // Object FieldGet with Baker's read barrier case. 1879 // /* HeapReference<Object> */ out = *(base + offset) 1880 Register base = RegisterFrom(base_loc, DataType::Type::kReference); 1881 Location maybe_temp = 1882 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 1883 // Note that potential implicit null checks are handled in this 1884 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. 1885 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1886 instruction, 1887 out, 1888 base, 1889 offset, 1890 maybe_temp, 1891 /* needs_null_check= */ true, 1892 field_info.IsVolatile()); 1893 } else { 1894 // General case. 1895 if (field_info.IsVolatile()) { 1896 // Note that a potential implicit null check is handled in this 1897 // CodeGeneratorARM64::LoadAcquire call. 1898 // NB: LoadAcquire will record the pc info if needed. 1899 codegen_->LoadAcquire( 1900 instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true); 1901 } else { 1902 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 1903 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 1904 codegen_->Load(load_type, OutputCPURegister(instruction), field); 1905 codegen_->MaybeRecordImplicitNullCheck(instruction); 1906 } 1907 if (load_type == DataType::Type::kReference) { 1908 // If read barriers are enabled, emit read barriers other than 1909 // Baker's using a slow path (and also unpoison the loaded 1910 // reference, if heap poisoning is enabled). 1911 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 1912 } 1913 } 1914 } 1915 1916 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { 1917 LocationSummary* locations = 1918 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 1919 locations->SetInAt(0, Location::RequiresRegister()); 1920 if (IsConstantZeroBitPattern(instruction->InputAt(1))) { 1921 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 1922 } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 1923 locations->SetInAt(1, Location::RequiresFpuRegister()); 1924 } else { 1925 locations->SetInAt(1, Location::RequiresRegister()); 1926 } 1927 } 1928 1929 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, 1930 const FieldInfo& field_info, 1931 bool value_can_be_null) { 1932 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 1933 1934 Register obj = InputRegisterAt(instruction, 0); 1935 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1); 1936 CPURegister source = value; 1937 Offset offset = field_info.GetFieldOffset(); 1938 DataType::Type field_type = field_info.GetFieldType(); 1939 1940 { 1941 // We use a block to end the scratch scope before the write barrier, thus 1942 // freeing the temporary registers so they can be used in `MarkGCCard`. 1943 UseScratchRegisterScope temps(GetVIXLAssembler()); 1944 1945 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 1946 DCHECK(value.IsW()); 1947 Register temp = temps.AcquireW(); 1948 __ Mov(temp, value.W()); 1949 GetAssembler()->PoisonHeapReference(temp.W()); 1950 source = temp; 1951 } 1952 1953 if (field_info.IsVolatile()) { 1954 codegen_->StoreRelease( 1955 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true); 1956 } else { 1957 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 1958 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 1959 codegen_->Store(field_type, source, HeapOperand(obj, offset)); 1960 codegen_->MaybeRecordImplicitNullCheck(instruction); 1961 } 1962 } 1963 1964 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 1965 codegen_->MarkGCCard(obj, Register(value), value_can_be_null); 1966 } 1967 } 1968 1969 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { 1970 DataType::Type type = instr->GetType(); 1971 1972 switch (type) { 1973 case DataType::Type::kInt32: 1974 case DataType::Type::kInt64: { 1975 Register dst = OutputRegister(instr); 1976 Register lhs = InputRegisterAt(instr, 0); 1977 Operand rhs = InputOperandAt(instr, 1); 1978 if (instr->IsAdd()) { 1979 __ Add(dst, lhs, rhs); 1980 } else if (instr->IsAnd()) { 1981 __ And(dst, lhs, rhs); 1982 } else if (instr->IsOr()) { 1983 __ Orr(dst, lhs, rhs); 1984 } else if (instr->IsSub()) { 1985 __ Sub(dst, lhs, rhs); 1986 } else if (instr->IsRor()) { 1987 if (rhs.IsImmediate()) { 1988 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1); 1989 __ Ror(dst, lhs, shift); 1990 } else { 1991 // Ensure shift distance is in the same size register as the result. If 1992 // we are rotating a long and the shift comes in a w register originally, 1993 // we don't need to sxtw for use as an x since the shift distances are 1994 // all & reg_bits - 1. 1995 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); 1996 } 1997 } else if (instr->IsMin() || instr->IsMax()) { 1998 __ Cmp(lhs, rhs); 1999 __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt); 2000 } else { 2001 DCHECK(instr->IsXor()); 2002 __ Eor(dst, lhs, rhs); 2003 } 2004 break; 2005 } 2006 case DataType::Type::kFloat32: 2007 case DataType::Type::kFloat64: { 2008 FPRegister dst = OutputFPRegister(instr); 2009 FPRegister lhs = InputFPRegisterAt(instr, 0); 2010 FPRegister rhs = InputFPRegisterAt(instr, 1); 2011 if (instr->IsAdd()) { 2012 __ Fadd(dst, lhs, rhs); 2013 } else if (instr->IsSub()) { 2014 __ Fsub(dst, lhs, rhs); 2015 } else if (instr->IsMin()) { 2016 __ Fmin(dst, lhs, rhs); 2017 } else if (instr->IsMax()) { 2018 __ Fmax(dst, lhs, rhs); 2019 } else { 2020 LOG(FATAL) << "Unexpected floating-point binary operation"; 2021 } 2022 break; 2023 } 2024 default: 2025 LOG(FATAL) << "Unexpected binary operation type " << type; 2026 } 2027 } 2028 2029 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { 2030 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2031 2032 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 2033 DataType::Type type = instr->GetResultType(); 2034 switch (type) { 2035 case DataType::Type::kInt32: 2036 case DataType::Type::kInt64: { 2037 locations->SetInAt(0, Location::RequiresRegister()); 2038 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); 2039 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2040 break; 2041 } 2042 default: 2043 LOG(FATAL) << "Unexpected shift type " << type; 2044 } 2045 } 2046 2047 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) { 2048 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2049 2050 DataType::Type type = instr->GetType(); 2051 switch (type) { 2052 case DataType::Type::kInt32: 2053 case DataType::Type::kInt64: { 2054 Register dst = OutputRegister(instr); 2055 Register lhs = InputRegisterAt(instr, 0); 2056 Operand rhs = InputOperandAt(instr, 1); 2057 if (rhs.IsImmediate()) { 2058 uint32_t shift_value = rhs.GetImmediate() & 2059 (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); 2060 if (instr->IsShl()) { 2061 __ Lsl(dst, lhs, shift_value); 2062 } else if (instr->IsShr()) { 2063 __ Asr(dst, lhs, shift_value); 2064 } else { 2065 __ Lsr(dst, lhs, shift_value); 2066 } 2067 } else { 2068 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W(); 2069 2070 if (instr->IsShl()) { 2071 __ Lsl(dst, lhs, rhs_reg); 2072 } else if (instr->IsShr()) { 2073 __ Asr(dst, lhs, rhs_reg); 2074 } else { 2075 __ Lsr(dst, lhs, rhs_reg); 2076 } 2077 } 2078 break; 2079 } 2080 default: 2081 LOG(FATAL) << "Unexpected shift operation type " << type; 2082 } 2083 } 2084 2085 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) { 2086 HandleBinaryOp(instruction); 2087 } 2088 2089 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) { 2090 HandleBinaryOp(instruction); 2091 } 2092 2093 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) { 2094 HandleBinaryOp(instruction); 2095 } 2096 2097 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { 2098 HandleBinaryOp(instruction); 2099 } 2100 2101 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2102 DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType(); 2103 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 2104 locations->SetInAt(0, Location::RequiresRegister()); 2105 // There is no immediate variant of negated bitwise instructions in AArch64. 2106 locations->SetInAt(1, Location::RequiresRegister()); 2107 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2108 } 2109 2110 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2111 Register dst = OutputRegister(instr); 2112 Register lhs = InputRegisterAt(instr, 0); 2113 Register rhs = InputRegisterAt(instr, 1); 2114 2115 switch (instr->GetOpKind()) { 2116 case HInstruction::kAnd: 2117 __ Bic(dst, lhs, rhs); 2118 break; 2119 case HInstruction::kOr: 2120 __ Orn(dst, lhs, rhs); 2121 break; 2122 case HInstruction::kXor: 2123 __ Eon(dst, lhs, rhs); 2124 break; 2125 default: 2126 LOG(FATAL) << "Unreachable"; 2127 } 2128 } 2129 2130 void LocationsBuilderARM64::VisitDataProcWithShifterOp( 2131 HDataProcWithShifterOp* instruction) { 2132 DCHECK(instruction->GetType() == DataType::Type::kInt32 || 2133 instruction->GetType() == DataType::Type::kInt64); 2134 LocationSummary* locations = 2135 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2136 if (instruction->GetInstrKind() == HInstruction::kNeg) { 2137 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); 2138 } else { 2139 locations->SetInAt(0, Location::RequiresRegister()); 2140 } 2141 locations->SetInAt(1, Location::RequiresRegister()); 2142 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2143 } 2144 2145 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp( 2146 HDataProcWithShifterOp* instruction) { 2147 DataType::Type type = instruction->GetType(); 2148 HInstruction::InstructionKind kind = instruction->GetInstrKind(); 2149 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 2150 Register out = OutputRegister(instruction); 2151 Register left; 2152 if (kind != HInstruction::kNeg) { 2153 left = InputRegisterAt(instruction, 0); 2154 } 2155 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the 2156 // shifter operand operation, the IR generating `right_reg` (input to the type 2157 // conversion) can have a different type from the current instruction's type, 2158 // so we manually indicate the type. 2159 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); 2160 Operand right_operand(0); 2161 2162 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); 2163 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { 2164 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); 2165 } else { 2166 right_operand = Operand(right_reg, 2167 helpers::ShiftFromOpKind(op_kind), 2168 instruction->GetShiftAmount()); 2169 } 2170 2171 // Logical binary operations do not support extension operations in the 2172 // operand. Note that VIXL would still manage if it was passed by generating 2173 // the extension as a separate instruction. 2174 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. 2175 DCHECK(!right_operand.IsExtendedRegister() || 2176 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && 2177 kind != HInstruction::kNeg)); 2178 switch (kind) { 2179 case HInstruction::kAdd: 2180 __ Add(out, left, right_operand); 2181 break; 2182 case HInstruction::kAnd: 2183 __ And(out, left, right_operand); 2184 break; 2185 case HInstruction::kNeg: 2186 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero()); 2187 __ Neg(out, right_operand); 2188 break; 2189 case HInstruction::kOr: 2190 __ Orr(out, left, right_operand); 2191 break; 2192 case HInstruction::kSub: 2193 __ Sub(out, left, right_operand); 2194 break; 2195 case HInstruction::kXor: 2196 __ Eor(out, left, right_operand); 2197 break; 2198 default: 2199 LOG(FATAL) << "Unexpected operation kind: " << kind; 2200 UNREACHABLE(); 2201 } 2202 } 2203 2204 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2205 LocationSummary* locations = 2206 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2207 locations->SetInAt(0, Location::RequiresRegister()); 2208 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction)); 2209 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2210 } 2211 2212 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2213 __ Add(OutputRegister(instruction), 2214 InputRegisterAt(instruction, 0), 2215 Operand(InputOperandAt(instruction, 1))); 2216 } 2217 2218 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { 2219 LocationSummary* locations = 2220 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2221 2222 HIntConstant* shift = instruction->GetShift()->AsIntConstant(); 2223 2224 locations->SetInAt(0, Location::RequiresRegister()); 2225 // For byte case we don't need to shift the index variable so we can encode the data offset into 2226 // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist 2227 // data offset constant generation out of the loop and reduce the critical path length in the 2228 // loop. 2229 locations->SetInAt(1, shift->GetValue() == 0 2230 ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) 2231 : Location::RequiresRegister()); 2232 locations->SetInAt(2, Location::ConstantLocation(shift)); 2233 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2234 } 2235 2236 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( 2237 HIntermediateAddressIndex* instruction) { 2238 Register index_reg = InputRegisterAt(instruction, 0); 2239 uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2)); 2240 uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); 2241 2242 if (shift == 0) { 2243 __ Add(OutputRegister(instruction), index_reg, offset); 2244 } else { 2245 Register offset_reg = InputRegisterAt(instruction, 1); 2246 __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift)); 2247 } 2248 } 2249 2250 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2251 LocationSummary* locations = 2252 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall); 2253 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2254 if (instr->GetOpKind() == HInstruction::kSub && 2255 accumulator->IsConstant() && 2256 accumulator->AsConstant()->IsArithmeticZero()) { 2257 // Don't allocate register for Mneg instruction. 2258 } else { 2259 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, 2260 Location::RequiresRegister()); 2261 } 2262 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); 2263 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); 2264 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2265 } 2266 2267 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2268 Register res = OutputRegister(instr); 2269 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex); 2270 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex); 2271 2272 // Avoid emitting code that could trigger Cortex A53's erratum 835769. 2273 // This fixup should be carried out for all multiply-accumulate instructions: 2274 // madd, msub, smaddl, smsubl, umaddl and umsubl. 2275 if (instr->GetType() == DataType::Type::kInt64 && 2276 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { 2277 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); 2278 vixl::aarch64::Instruction* prev = 2279 masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize; 2280 if (prev->IsLoadOrStore()) { 2281 // Make sure we emit only exactly one nop. 2282 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2283 __ nop(); 2284 } 2285 } 2286 2287 if (instr->GetOpKind() == HInstruction::kAdd) { 2288 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2289 __ Madd(res, mul_left, mul_right, accumulator); 2290 } else { 2291 DCHECK(instr->GetOpKind() == HInstruction::kSub); 2292 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2293 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) { 2294 __ Mneg(res, mul_left, mul_right); 2295 } else { 2296 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2297 __ Msub(res, mul_left, mul_right, accumulator); 2298 } 2299 } 2300 } 2301 2302 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { 2303 bool object_array_get_with_read_barrier = 2304 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 2305 LocationSummary* locations = 2306 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 2307 object_array_get_with_read_barrier 2308 ? LocationSummary::kCallOnSlowPath 2309 : LocationSummary::kNoCall); 2310 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 2311 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2312 if (instruction->GetIndex()->IsConstant()) { 2313 // Array loads with constant index are treated as field loads. 2314 // We need a temporary register for the read barrier load in 2315 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() 2316 // only if the offset is too big. 2317 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2318 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); 2319 offset += index << DataType::SizeShift(DataType::Type::kReference); 2320 if (offset >= kReferenceLoadMinFarOffset) { 2321 locations->AddTemp(FixedTempLocation()); 2322 } 2323 } else if (!instruction->GetArray()->IsIntermediateAddress()) { 2324 // We need a non-scratch temporary for the array data pointer in 2325 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no 2326 // intermediate address. 2327 locations->AddTemp(Location::RequiresRegister()); 2328 } 2329 } 2330 locations->SetInAt(0, Location::RequiresRegister()); 2331 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2332 if (DataType::IsFloatingPointType(instruction->GetType())) { 2333 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2334 } else { 2335 // The output overlaps in the case of an object array get with 2336 // read barriers enabled: we do not want the move to overwrite the 2337 // array's location, as we need it to emit the read barrier. 2338 locations->SetOut( 2339 Location::RequiresRegister(), 2340 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 2341 } 2342 } 2343 2344 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { 2345 DataType::Type type = instruction->GetType(); 2346 Register obj = InputRegisterAt(instruction, 0); 2347 LocationSummary* locations = instruction->GetLocations(); 2348 Location index = locations->InAt(1); 2349 Location out = locations->Out(); 2350 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2351 const bool maybe_compressed_char_at = mirror::kUseStringCompression && 2352 instruction->IsStringCharAt(); 2353 MacroAssembler* masm = GetVIXLAssembler(); 2354 UseScratchRegisterScope temps(masm); 2355 2356 // The non-Baker read barrier instrumentation of object ArrayGet instructions 2357 // does not support the HIntermediateAddress instruction. 2358 DCHECK(!((type == DataType::Type::kReference) && 2359 instruction->GetArray()->IsIntermediateAddress() && 2360 kEmitCompilerReadBarrier && 2361 !kUseBakerReadBarrier)); 2362 2363 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2364 // Object ArrayGet with Baker's read barrier case. 2365 // Note that a potential implicit null check is handled in the 2366 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. 2367 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); 2368 if (index.IsConstant()) { 2369 DCHECK(!instruction->GetArray()->IsIntermediateAddress()); 2370 // Array load with a constant index can be treated as a field load. 2371 offset += Int64FromLocation(index) << DataType::SizeShift(type); 2372 Location maybe_temp = 2373 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 2374 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 2375 out, 2376 obj.W(), 2377 offset, 2378 maybe_temp, 2379 /* needs_null_check= */ false, 2380 /* use_load_acquire= */ false); 2381 } else { 2382 codegen_->GenerateArrayLoadWithBakerReadBarrier( 2383 instruction, out, obj.W(), offset, index, /* needs_null_check= */ false); 2384 } 2385 } else { 2386 // General case. 2387 MemOperand source = HeapOperand(obj); 2388 Register length; 2389 if (maybe_compressed_char_at) { 2390 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 2391 length = temps.AcquireW(); 2392 { 2393 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2394 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2395 2396 if (instruction->GetArray()->IsIntermediateAddress()) { 2397 DCHECK_LT(count_offset, offset); 2398 int64_t adjusted_offset = 2399 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset); 2400 // Note that `adjusted_offset` is negative, so this will be a LDUR. 2401 __ Ldr(length, MemOperand(obj.X(), adjusted_offset)); 2402 } else { 2403 __ Ldr(length, HeapOperand(obj, count_offset)); 2404 } 2405 codegen_->MaybeRecordImplicitNullCheck(instruction); 2406 } 2407 } 2408 if (index.IsConstant()) { 2409 if (maybe_compressed_char_at) { 2410 vixl::aarch64::Label uncompressed_load, done; 2411 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2412 "Expecting 0=compressed, 1=uncompressed"); 2413 __ Tbnz(length.W(), 0, &uncompressed_load); 2414 __ Ldrb(Register(OutputCPURegister(instruction)), 2415 HeapOperand(obj, offset + Int64FromLocation(index))); 2416 __ B(&done); 2417 __ Bind(&uncompressed_load); 2418 __ Ldrh(Register(OutputCPURegister(instruction)), 2419 HeapOperand(obj, offset + (Int64FromLocation(index) << 1))); 2420 __ Bind(&done); 2421 } else { 2422 offset += Int64FromLocation(index) << DataType::SizeShift(type); 2423 source = HeapOperand(obj, offset); 2424 } 2425 } else { 2426 Register temp = temps.AcquireSameSizeAs(obj); 2427 if (instruction->GetArray()->IsIntermediateAddress()) { 2428 // We do not need to compute the intermediate address from the array: the 2429 // input instruction has done it already. See the comment in 2430 // `TryExtractArrayAccessAddress()`. 2431 if (kIsDebugBuild) { 2432 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); 2433 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); 2434 } 2435 temp = obj; 2436 } else { 2437 __ Add(temp, obj, offset); 2438 } 2439 if (maybe_compressed_char_at) { 2440 vixl::aarch64::Label uncompressed_load, done; 2441 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2442 "Expecting 0=compressed, 1=uncompressed"); 2443 __ Tbnz(length.W(), 0, &uncompressed_load); 2444 __ Ldrb(Register(OutputCPURegister(instruction)), 2445 HeapOperand(temp, XRegisterFrom(index), LSL, 0)); 2446 __ B(&done); 2447 __ Bind(&uncompressed_load); 2448 __ Ldrh(Register(OutputCPURegister(instruction)), 2449 HeapOperand(temp, XRegisterFrom(index), LSL, 1)); 2450 __ Bind(&done); 2451 } else { 2452 source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type)); 2453 } 2454 } 2455 if (!maybe_compressed_char_at) { 2456 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2457 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2458 codegen_->Load(type, OutputCPURegister(instruction), source); 2459 codegen_->MaybeRecordImplicitNullCheck(instruction); 2460 } 2461 2462 if (type == DataType::Type::kReference) { 2463 static_assert( 2464 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 2465 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 2466 Location obj_loc = locations->InAt(0); 2467 if (index.IsConstant()) { 2468 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); 2469 } else { 2470 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); 2471 } 2472 } 2473 } 2474 } 2475 2476 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { 2477 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 2478 locations->SetInAt(0, Location::RequiresRegister()); 2479 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2480 } 2481 2482 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { 2483 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 2484 vixl::aarch64::Register out = OutputRegister(instruction); 2485 { 2486 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2487 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2488 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset)); 2489 codegen_->MaybeRecordImplicitNullCheck(instruction); 2490 } 2491 // Mask out compression flag from String's array length. 2492 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 2493 __ Lsr(out.W(), out.W(), 1u); 2494 } 2495 } 2496 2497 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { 2498 DataType::Type value_type = instruction->GetComponentType(); 2499 2500 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2501 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 2502 instruction, 2503 may_need_runtime_call_for_type_check ? 2504 LocationSummary::kCallOnSlowPath : 2505 LocationSummary::kNoCall); 2506 locations->SetInAt(0, Location::RequiresRegister()); 2507 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2508 if (IsConstantZeroBitPattern(instruction->InputAt(2))) { 2509 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 2510 } else if (DataType::IsFloatingPointType(value_type)) { 2511 locations->SetInAt(2, Location::RequiresFpuRegister()); 2512 } else { 2513 locations->SetInAt(2, Location::RequiresRegister()); 2514 } 2515 } 2516 2517 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { 2518 DataType::Type value_type = instruction->GetComponentType(); 2519 LocationSummary* locations = instruction->GetLocations(); 2520 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2521 bool needs_write_barrier = 2522 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 2523 2524 Register array = InputRegisterAt(instruction, 0); 2525 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2); 2526 CPURegister source = value; 2527 Location index = locations->InAt(1); 2528 size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); 2529 MemOperand destination = HeapOperand(array); 2530 MacroAssembler* masm = GetVIXLAssembler(); 2531 2532 if (!needs_write_barrier) { 2533 DCHECK(!may_need_runtime_call_for_type_check); 2534 if (index.IsConstant()) { 2535 offset += Int64FromLocation(index) << DataType::SizeShift(value_type); 2536 destination = HeapOperand(array, offset); 2537 } else { 2538 UseScratchRegisterScope temps(masm); 2539 Register temp = temps.AcquireSameSizeAs(array); 2540 if (instruction->GetArray()->IsIntermediateAddress()) { 2541 // We do not need to compute the intermediate address from the array: the 2542 // input instruction has done it already. See the comment in 2543 // `TryExtractArrayAccessAddress()`. 2544 if (kIsDebugBuild) { 2545 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); 2546 DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); 2547 } 2548 temp = array; 2549 } else { 2550 __ Add(temp, array, offset); 2551 } 2552 destination = HeapOperand(temp, 2553 XRegisterFrom(index), 2554 LSL, 2555 DataType::SizeShift(value_type)); 2556 } 2557 { 2558 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2559 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2560 codegen_->Store(value_type, value, destination); 2561 codegen_->MaybeRecordImplicitNullCheck(instruction); 2562 } 2563 } else { 2564 DCHECK(!instruction->GetArray()->IsIntermediateAddress()); 2565 vixl::aarch64::Label done; 2566 SlowPathCodeARM64* slow_path = nullptr; 2567 { 2568 // We use a block to end the scratch scope before the write barrier, thus 2569 // freeing the temporary registers so they can be used in `MarkGCCard`. 2570 UseScratchRegisterScope temps(masm); 2571 Register temp = temps.AcquireSameSizeAs(array); 2572 if (index.IsConstant()) { 2573 offset += Int64FromLocation(index) << DataType::SizeShift(value_type); 2574 destination = HeapOperand(array, offset); 2575 } else { 2576 destination = HeapOperand(temp, 2577 XRegisterFrom(index), 2578 LSL, 2579 DataType::SizeShift(value_type)); 2580 } 2581 2582 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2583 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2584 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2585 2586 if (may_need_runtime_call_for_type_check) { 2587 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction); 2588 codegen_->AddSlowPath(slow_path); 2589 if (instruction->GetValueCanBeNull()) { 2590 vixl::aarch64::Label non_zero; 2591 __ Cbnz(Register(value), &non_zero); 2592 if (!index.IsConstant()) { 2593 __ Add(temp, array, offset); 2594 } 2595 { 2596 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools 2597 // emitted. 2598 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2599 __ Str(wzr, destination); 2600 codegen_->MaybeRecordImplicitNullCheck(instruction); 2601 } 2602 __ B(&done); 2603 __ Bind(&non_zero); 2604 } 2605 2606 // Note that when Baker read barriers are enabled, the type 2607 // checks are performed without read barriers. This is fine, 2608 // even in the case where a class object is in the from-space 2609 // after the flip, as a comparison involving such a type would 2610 // not produce a false positive; it may of course produce a 2611 // false negative, in which case we would take the ArraySet 2612 // slow path. 2613 2614 Register temp2 = temps.AcquireSameSizeAs(array); 2615 // /* HeapReference<Class> */ temp = array->klass_ 2616 { 2617 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2618 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2619 __ Ldr(temp, HeapOperand(array, class_offset)); 2620 codegen_->MaybeRecordImplicitNullCheck(instruction); 2621 } 2622 GetAssembler()->MaybeUnpoisonHeapReference(temp); 2623 2624 // /* HeapReference<Class> */ temp = temp->component_type_ 2625 __ Ldr(temp, HeapOperand(temp, component_offset)); 2626 // /* HeapReference<Class> */ temp2 = value->klass_ 2627 __ Ldr(temp2, HeapOperand(Register(value), class_offset)); 2628 // If heap poisoning is enabled, no need to unpoison `temp` 2629 // nor `temp2`, as we are comparing two poisoned references. 2630 __ Cmp(temp, temp2); 2631 temps.Release(temp2); 2632 2633 if (instruction->StaticTypeOfArrayIsObjectArray()) { 2634 vixl::aarch64::Label do_put; 2635 __ B(eq, &do_put); 2636 // If heap poisoning is enabled, the `temp` reference has 2637 // not been unpoisoned yet; unpoison it now. 2638 GetAssembler()->MaybeUnpoisonHeapReference(temp); 2639 2640 // /* HeapReference<Class> */ temp = temp->super_class_ 2641 __ Ldr(temp, HeapOperand(temp, super_offset)); 2642 // If heap poisoning is enabled, no need to unpoison 2643 // `temp`, as we are comparing against null below. 2644 __ Cbnz(temp, slow_path->GetEntryLabel()); 2645 __ Bind(&do_put); 2646 } else { 2647 __ B(ne, slow_path->GetEntryLabel()); 2648 } 2649 } 2650 2651 if (kPoisonHeapReferences) { 2652 Register temp2 = temps.AcquireSameSizeAs(array); 2653 DCHECK(value.IsW()); 2654 __ Mov(temp2, value.W()); 2655 GetAssembler()->PoisonHeapReference(temp2); 2656 source = temp2; 2657 } 2658 2659 if (!index.IsConstant()) { 2660 __ Add(temp, array, offset); 2661 } else { 2662 // We no longer need the `temp` here so release it as the store below may 2663 // need a scratch register (if the constant index makes the offset too large) 2664 // and the poisoned `source` could be using the other scratch register. 2665 temps.Release(temp); 2666 } 2667 { 2668 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2669 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2670 __ Str(source, destination); 2671 2672 if (!may_need_runtime_call_for_type_check) { 2673 codegen_->MaybeRecordImplicitNullCheck(instruction); 2674 } 2675 } 2676 } 2677 2678 codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull()); 2679 2680 if (done.IsLinked()) { 2681 __ Bind(&done); 2682 } 2683 2684 if (slow_path != nullptr) { 2685 __ Bind(slow_path->GetExitLabel()); 2686 } 2687 } 2688 } 2689 2690 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 2691 RegisterSet caller_saves = RegisterSet::Empty(); 2692 InvokeRuntimeCallingConvention calling_convention; 2693 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 2694 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode())); 2695 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 2696 locations->SetInAt(0, Location::RequiresRegister()); 2697 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 2698 } 2699 2700 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 2701 BoundsCheckSlowPathARM64* slow_path = 2702 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction); 2703 codegen_->AddSlowPath(slow_path); 2704 __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); 2705 __ B(slow_path->GetEntryLabel(), hs); 2706 } 2707 2708 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { 2709 LocationSummary* locations = 2710 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 2711 locations->SetInAt(0, Location::RequiresRegister()); 2712 if (check->HasUses()) { 2713 locations->SetOut(Location::SameAsFirstInput()); 2714 } 2715 // Rely on the type initialization to save everything we need. 2716 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 2717 } 2718 2719 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { 2720 // We assume the class is not null. 2721 SlowPathCodeARM64* slow_path = 2722 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check); 2723 codegen_->AddSlowPath(slow_path); 2724 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); 2725 } 2726 2727 static bool IsFloatingPointZeroConstant(HInstruction* inst) { 2728 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero())) 2729 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero())); 2730 } 2731 2732 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) { 2733 FPRegister lhs_reg = InputFPRegisterAt(instruction, 0); 2734 Location rhs_loc = instruction->GetLocations()->InAt(1); 2735 if (rhs_loc.IsConstant()) { 2736 // 0.0 is the only immediate that can be encoded directly in 2737 // an FCMP instruction. 2738 // 2739 // Both the JLS (section 15.20.1) and the JVMS (section 6.5) 2740 // specify that in a floating-point comparison, positive zero 2741 // and negative zero are considered equal, so we can use the 2742 // literal 0.0 for both cases here. 2743 // 2744 // Note however that some methods (Float.equal, Float.compare, 2745 // Float.compareTo, Double.equal, Double.compare, 2746 // Double.compareTo, Math.max, Math.min, StrictMath.max, 2747 // StrictMath.min) consider 0.0 to be (strictly) greater than 2748 // -0.0. So if we ever translate calls to these methods into a 2749 // HCompare instruction, we must handle the -0.0 case with 2750 // care here. 2751 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant())); 2752 __ Fcmp(lhs_reg, 0.0); 2753 } else { 2754 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1)); 2755 } 2756 } 2757 2758 void LocationsBuilderARM64::VisitCompare(HCompare* compare) { 2759 LocationSummary* locations = 2760 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 2761 DataType::Type in_type = compare->InputAt(0)->GetType(); 2762 switch (in_type) { 2763 case DataType::Type::kBool: 2764 case DataType::Type::kUint8: 2765 case DataType::Type::kInt8: 2766 case DataType::Type::kUint16: 2767 case DataType::Type::kInt16: 2768 case DataType::Type::kInt32: 2769 case DataType::Type::kInt64: { 2770 locations->SetInAt(0, Location::RequiresRegister()); 2771 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); 2772 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2773 break; 2774 } 2775 case DataType::Type::kFloat32: 2776 case DataType::Type::kFloat64: { 2777 locations->SetInAt(0, Location::RequiresFpuRegister()); 2778 locations->SetInAt(1, 2779 IsFloatingPointZeroConstant(compare->InputAt(1)) 2780 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) 2781 : Location::RequiresFpuRegister()); 2782 locations->SetOut(Location::RequiresRegister()); 2783 break; 2784 } 2785 default: 2786 LOG(FATAL) << "Unexpected type for compare operation " << in_type; 2787 } 2788 } 2789 2790 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { 2791 DataType::Type in_type = compare->InputAt(0)->GetType(); 2792 2793 // 0 if: left == right 2794 // 1 if: left > right 2795 // -1 if: left < right 2796 switch (in_type) { 2797 case DataType::Type::kBool: 2798 case DataType::Type::kUint8: 2799 case DataType::Type::kInt8: 2800 case DataType::Type::kUint16: 2801 case DataType::Type::kInt16: 2802 case DataType::Type::kInt32: 2803 case DataType::Type::kInt64: { 2804 Register result = OutputRegister(compare); 2805 Register left = InputRegisterAt(compare, 0); 2806 Operand right = InputOperandAt(compare, 1); 2807 __ Cmp(left, right); 2808 __ Cset(result, ne); // result == +1 if NE or 0 otherwise 2809 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise 2810 break; 2811 } 2812 case DataType::Type::kFloat32: 2813 case DataType::Type::kFloat64: { 2814 Register result = OutputRegister(compare); 2815 GenerateFcmp(compare); 2816 __ Cset(result, ne); 2817 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias())); 2818 break; 2819 } 2820 default: 2821 LOG(FATAL) << "Unimplemented compare type " << in_type; 2822 } 2823 } 2824 2825 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { 2826 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 2827 2828 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 2829 locations->SetInAt(0, Location::RequiresFpuRegister()); 2830 locations->SetInAt(1, 2831 IsFloatingPointZeroConstant(instruction->InputAt(1)) 2832 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) 2833 : Location::RequiresFpuRegister()); 2834 } else { 2835 // Integer cases. 2836 locations->SetInAt(0, Location::RequiresRegister()); 2837 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 2838 } 2839 2840 if (!instruction->IsEmittedAtUseSite()) { 2841 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2842 } 2843 } 2844 2845 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { 2846 if (instruction->IsEmittedAtUseSite()) { 2847 return; 2848 } 2849 2850 LocationSummary* locations = instruction->GetLocations(); 2851 Register res = RegisterFrom(locations->Out(), instruction->GetType()); 2852 IfCondition if_cond = instruction->GetCondition(); 2853 2854 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 2855 GenerateFcmp(instruction); 2856 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); 2857 } else { 2858 // Integer cases. 2859 Register lhs = InputRegisterAt(instruction, 0); 2860 Operand rhs = InputOperandAt(instruction, 1); 2861 __ Cmp(lhs, rhs); 2862 __ Cset(res, ARM64Condition(if_cond)); 2863 } 2864 } 2865 2866 #define FOR_EACH_CONDITION_INSTRUCTION(M) \ 2867 M(Equal) \ 2868 M(NotEqual) \ 2869 M(LessThan) \ 2870 M(LessThanOrEqual) \ 2871 M(GreaterThan) \ 2872 M(GreaterThanOrEqual) \ 2873 M(Below) \ 2874 M(BelowOrEqual) \ 2875 M(Above) \ 2876 M(AboveOrEqual) 2877 #define DEFINE_CONDITION_VISITORS(Name) \ 2878 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \ 2879 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } 2880 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) 2881 #undef DEFINE_CONDITION_VISITORS 2882 #undef FOR_EACH_CONDITION_INSTRUCTION 2883 2884 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) { 2885 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 2886 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); 2887 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; 2888 2889 Register out = OutputRegister(instruction); 2890 Register dividend = InputRegisterAt(instruction, 0); 2891 2892 if (abs_imm == 2) { 2893 int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte; 2894 __ Add(out, dividend, Operand(dividend, LSR, bits - 1)); 2895 } else { 2896 UseScratchRegisterScope temps(GetVIXLAssembler()); 2897 Register temp = temps.AcquireSameSizeAs(out); 2898 __ Add(temp, dividend, abs_imm - 1); 2899 __ Cmp(dividend, 0); 2900 __ Csel(out, temp, dividend, lt); 2901 } 2902 2903 int ctz_imm = CTZ(abs_imm); 2904 if (imm > 0) { 2905 __ Asr(out, out, ctz_imm); 2906 } else { 2907 __ Neg(out, Operand(out, ASR, ctz_imm)); 2908 } 2909 } 2910 2911 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 2912 DCHECK(instruction->IsDiv() || instruction->IsRem()); 2913 2914 LocationSummary* locations = instruction->GetLocations(); 2915 Location second = locations->InAt(1); 2916 DCHECK(second.IsConstant()); 2917 2918 Register out = OutputRegister(instruction); 2919 Register dividend = InputRegisterAt(instruction, 0); 2920 int64_t imm = Int64FromConstant(second.GetConstant()); 2921 2922 DataType::Type type = instruction->GetResultType(); 2923 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 2924 2925 int64_t magic; 2926 int shift; 2927 CalculateMagicAndShiftForDivRem( 2928 imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift); 2929 2930 UseScratchRegisterScope temps(GetVIXLAssembler()); 2931 Register temp = temps.AcquireSameSizeAs(out); 2932 2933 // temp = get_high(dividend * magic) 2934 __ Mov(temp, magic); 2935 if (type == DataType::Type::kInt64) { 2936 __ Smulh(temp, dividend, temp); 2937 } else { 2938 __ Smull(temp.X(), dividend, temp); 2939 __ Lsr(temp.X(), temp.X(), 32); 2940 } 2941 2942 if (imm > 0 && magic < 0) { 2943 __ Add(temp, temp, dividend); 2944 } else if (imm < 0 && magic > 0) { 2945 __ Sub(temp, temp, dividend); 2946 } 2947 2948 if (shift != 0) { 2949 __ Asr(temp, temp, shift); 2950 } 2951 2952 if (instruction->IsDiv()) { 2953 __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); 2954 } else { 2955 __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); 2956 // TODO: Strength reduction for msub. 2957 Register temp_imm = temps.AcquireSameSizeAs(out); 2958 __ Mov(temp_imm, imm); 2959 __ Msub(out, temp, temp_imm, dividend); 2960 } 2961 } 2962 2963 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) { 2964 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 2965 2966 if (imm == 0) { 2967 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 2968 return; 2969 } 2970 2971 if (IsPowerOfTwo(AbsOrMin(imm))) { 2972 GenerateIntDivForPower2Denom(instruction); 2973 } else { 2974 // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier. 2975 DCHECK(imm < -2 || imm > 2) << imm; 2976 GenerateDivRemWithAnyConstant(instruction); 2977 } 2978 } 2979 2980 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) { 2981 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) 2982 << instruction->GetResultType(); 2983 2984 if (instruction->GetLocations()->InAt(1).IsConstant()) { 2985 GenerateIntDivForConstDenom(instruction); 2986 } else { 2987 Register out = OutputRegister(instruction); 2988 Register dividend = InputRegisterAt(instruction, 0); 2989 Register divisor = InputRegisterAt(instruction, 1); 2990 __ Sdiv(out, dividend, divisor); 2991 } 2992 } 2993 2994 void LocationsBuilderARM64::VisitDiv(HDiv* div) { 2995 LocationSummary* locations = 2996 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); 2997 switch (div->GetResultType()) { 2998 case DataType::Type::kInt32: 2999 case DataType::Type::kInt64: 3000 locations->SetInAt(0, Location::RequiresRegister()); 3001 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3002 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3003 break; 3004 3005 case DataType::Type::kFloat32: 3006 case DataType::Type::kFloat64: 3007 locations->SetInAt(0, Location::RequiresFpuRegister()); 3008 locations->SetInAt(1, Location::RequiresFpuRegister()); 3009 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3010 break; 3011 3012 default: 3013 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3014 } 3015 } 3016 3017 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { 3018 DataType::Type type = div->GetResultType(); 3019 switch (type) { 3020 case DataType::Type::kInt32: 3021 case DataType::Type::kInt64: 3022 GenerateIntDiv(div); 3023 break; 3024 3025 case DataType::Type::kFloat32: 3026 case DataType::Type::kFloat64: 3027 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1)); 3028 break; 3029 3030 default: 3031 LOG(FATAL) << "Unexpected div type " << type; 3032 } 3033 } 3034 3035 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3036 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 3037 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 3038 } 3039 3040 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3041 SlowPathCodeARM64* slow_path = 3042 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction); 3043 codegen_->AddSlowPath(slow_path); 3044 Location value = instruction->GetLocations()->InAt(0); 3045 3046 DataType::Type type = instruction->GetType(); 3047 3048 if (!DataType::IsIntegralType(type)) { 3049 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; 3050 UNREACHABLE(); 3051 } 3052 3053 if (value.IsConstant()) { 3054 int64_t divisor = Int64FromLocation(value); 3055 if (divisor == 0) { 3056 __ B(slow_path->GetEntryLabel()); 3057 } else { 3058 // A division by a non-null constant is valid. We don't need to perform 3059 // any check, so simply fall through. 3060 } 3061 } else { 3062 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); 3063 } 3064 } 3065 3066 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { 3067 LocationSummary* locations = 3068 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3069 locations->SetOut(Location::ConstantLocation(constant)); 3070 } 3071 3072 void InstructionCodeGeneratorARM64::VisitDoubleConstant( 3073 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 3074 // Will be generated at use site. 3075 } 3076 3077 void LocationsBuilderARM64::VisitExit(HExit* exit) { 3078 exit->SetLocations(nullptr); 3079 } 3080 3081 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 3082 } 3083 3084 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { 3085 LocationSummary* locations = 3086 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3087 locations->SetOut(Location::ConstantLocation(constant)); 3088 } 3089 3090 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 3091 // Will be generated at use site. 3092 } 3093 3094 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 3095 if (successor->IsExitBlock()) { 3096 DCHECK(got->GetPrevious()->AlwaysThrows()); 3097 return; // no code needed 3098 } 3099 3100 HBasicBlock* block = got->GetBlock(); 3101 HInstruction* previous = got->GetPrevious(); 3102 HLoopInformation* info = block->GetLoopInformation(); 3103 3104 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 3105 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { 3106 UseScratchRegisterScope temps(GetVIXLAssembler()); 3107 Register temp1 = temps.AcquireX(); 3108 Register temp2 = temps.AcquireX(); 3109 __ Ldr(temp1, MemOperand(sp, 0)); 3110 __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); 3111 __ Add(temp2, temp2, 1); 3112 __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); 3113 } 3114 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 3115 return; 3116 } 3117 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 3118 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 3119 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 3120 } 3121 if (!codegen_->GoesToNextBlock(block, successor)) { 3122 __ B(codegen_->GetLabelOf(successor)); 3123 } 3124 } 3125 3126 void LocationsBuilderARM64::VisitGoto(HGoto* got) { 3127 got->SetLocations(nullptr); 3128 } 3129 3130 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { 3131 HandleGoto(got, got->GetSuccessor()); 3132 } 3133 3134 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3135 try_boundary->SetLocations(nullptr); 3136 } 3137 3138 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3139 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 3140 if (!successor->IsExitBlock()) { 3141 HandleGoto(try_boundary, successor); 3142 } 3143 } 3144 3145 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, 3146 size_t condition_input_index, 3147 vixl::aarch64::Label* true_target, 3148 vixl::aarch64::Label* false_target) { 3149 HInstruction* cond = instruction->InputAt(condition_input_index); 3150 3151 if (true_target == nullptr && false_target == nullptr) { 3152 // Nothing to do. The code always falls through. 3153 return; 3154 } else if (cond->IsIntConstant()) { 3155 // Constant condition, statically compared against "true" (integer value 1). 3156 if (cond->AsIntConstant()->IsTrue()) { 3157 if (true_target != nullptr) { 3158 __ B(true_target); 3159 } 3160 } else { 3161 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 3162 if (false_target != nullptr) { 3163 __ B(false_target); 3164 } 3165 } 3166 return; 3167 } 3168 3169 // The following code generates these patterns: 3170 // (1) true_target == nullptr && false_target != nullptr 3171 // - opposite condition true => branch to false_target 3172 // (2) true_target != nullptr && false_target == nullptr 3173 // - condition true => branch to true_target 3174 // (3) true_target != nullptr && false_target != nullptr 3175 // - condition true => branch to true_target 3176 // - branch to false_target 3177 if (IsBooleanValueOrMaterializedCondition(cond)) { 3178 // The condition instruction has been materialized, compare the output to 0. 3179 Location cond_val = instruction->GetLocations()->InAt(condition_input_index); 3180 DCHECK(cond_val.IsRegister()); 3181 if (true_target == nullptr) { 3182 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); 3183 } else { 3184 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); 3185 } 3186 } else { 3187 // The condition instruction has not been materialized, use its inputs as 3188 // the comparison and its condition as the branch condition. 3189 HCondition* condition = cond->AsCondition(); 3190 3191 DataType::Type type = condition->InputAt(0)->GetType(); 3192 if (DataType::IsFloatingPointType(type)) { 3193 GenerateFcmp(condition); 3194 if (true_target == nullptr) { 3195 IfCondition opposite_condition = condition->GetOppositeCondition(); 3196 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target); 3197 } else { 3198 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target); 3199 } 3200 } else { 3201 // Integer cases. 3202 Register lhs = InputRegisterAt(condition, 0); 3203 Operand rhs = InputOperandAt(condition, 1); 3204 3205 Condition arm64_cond; 3206 vixl::aarch64::Label* non_fallthrough_target; 3207 if (true_target == nullptr) { 3208 arm64_cond = ARM64Condition(condition->GetOppositeCondition()); 3209 non_fallthrough_target = false_target; 3210 } else { 3211 arm64_cond = ARM64Condition(condition->GetCondition()); 3212 non_fallthrough_target = true_target; 3213 } 3214 3215 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) && 3216 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) { 3217 switch (arm64_cond) { 3218 case eq: 3219 __ Cbz(lhs, non_fallthrough_target); 3220 break; 3221 case ne: 3222 __ Cbnz(lhs, non_fallthrough_target); 3223 break; 3224 case lt: 3225 // Test the sign bit and branch accordingly. 3226 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3227 break; 3228 case ge: 3229 // Test the sign bit and branch accordingly. 3230 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3231 break; 3232 default: 3233 // Without the `static_cast` the compiler throws an error for 3234 // `-Werror=sign-promo`. 3235 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); 3236 } 3237 } else { 3238 __ Cmp(lhs, rhs); 3239 __ B(arm64_cond, non_fallthrough_target); 3240 } 3241 } 3242 } 3243 3244 // If neither branch falls through (case 3), the conditional branch to `true_target` 3245 // was already emitted (case 2) and we need to emit a jump to `false_target`. 3246 if (true_target != nullptr && false_target != nullptr) { 3247 __ B(false_target); 3248 } 3249 } 3250 3251 void LocationsBuilderARM64::VisitIf(HIf* if_instr) { 3252 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 3253 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 3254 locations->SetInAt(0, Location::RequiresRegister()); 3255 } 3256 } 3257 3258 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { 3259 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 3260 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 3261 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor); 3262 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) { 3263 true_target = nullptr; 3264 } 3265 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor); 3266 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { 3267 false_target = nullptr; 3268 } 3269 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); 3270 } 3271 3272 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3273 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3274 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 3275 InvokeRuntimeCallingConvention calling_convention; 3276 RegisterSet caller_saves = RegisterSet::Empty(); 3277 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 3278 locations->SetCustomSlowPathCallerSaves(caller_saves); 3279 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 3280 locations->SetInAt(0, Location::RequiresRegister()); 3281 } 3282 } 3283 3284 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3285 SlowPathCodeARM64* slow_path = 3286 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); 3287 GenerateTestAndBranch(deoptimize, 3288 /* condition_input_index= */ 0, 3289 slow_path->GetEntryLabel(), 3290 /* false_target= */ nullptr); 3291 } 3292 3293 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3294 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3295 LocationSummary(flag, LocationSummary::kNoCall); 3296 locations->SetOut(Location::RequiresRegister()); 3297 } 3298 3299 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3300 __ Ldr(OutputRegister(flag), 3301 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 3302 } 3303 3304 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { 3305 return condition->IsCondition() && 3306 DataType::IsFloatingPointType(condition->InputAt(0)->GetType()); 3307 } 3308 3309 static inline Condition GetConditionForSelect(HCondition* condition) { 3310 IfCondition cond = condition->AsCondition()->GetCondition(); 3311 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias()) 3312 : ARM64Condition(cond); 3313 } 3314 3315 void LocationsBuilderARM64::VisitSelect(HSelect* select) { 3316 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 3317 if (DataType::IsFloatingPointType(select->GetType())) { 3318 locations->SetInAt(0, Location::RequiresFpuRegister()); 3319 locations->SetInAt(1, Location::RequiresFpuRegister()); 3320 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3321 } else { 3322 HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); 3323 HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); 3324 bool is_true_value_constant = cst_true_value != nullptr; 3325 bool is_false_value_constant = cst_false_value != nullptr; 3326 // Ask VIXL whether we should synthesize constants in registers. 3327 // We give an arbitrary register to VIXL when dealing with non-constant inputs. 3328 Operand true_op = is_true_value_constant ? 3329 Operand(Int64FromConstant(cst_true_value)) : Operand(x1); 3330 Operand false_op = is_false_value_constant ? 3331 Operand(Int64FromConstant(cst_false_value)) : Operand(x2); 3332 bool true_value_in_register = false; 3333 bool false_value_in_register = false; 3334 MacroAssembler::GetCselSynthesisInformation( 3335 x0, true_op, false_op, &true_value_in_register, &false_value_in_register); 3336 true_value_in_register |= !is_true_value_constant; 3337 false_value_in_register |= !is_false_value_constant; 3338 3339 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister() 3340 : Location::ConstantLocation(cst_true_value)); 3341 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister() 3342 : Location::ConstantLocation(cst_false_value)); 3343 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3344 } 3345 3346 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 3347 locations->SetInAt(2, Location::RequiresRegister()); 3348 } 3349 } 3350 3351 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { 3352 HInstruction* cond = select->GetCondition(); 3353 Condition csel_cond; 3354 3355 if (IsBooleanValueOrMaterializedCondition(cond)) { 3356 if (cond->IsCondition() && cond->GetNext() == select) { 3357 // Use the condition flags set by the previous instruction. 3358 csel_cond = GetConditionForSelect(cond->AsCondition()); 3359 } else { 3360 __ Cmp(InputRegisterAt(select, 2), 0); 3361 csel_cond = ne; 3362 } 3363 } else if (IsConditionOnFloatingPointValues(cond)) { 3364 GenerateFcmp(cond); 3365 csel_cond = GetConditionForSelect(cond->AsCondition()); 3366 } else { 3367 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1)); 3368 csel_cond = GetConditionForSelect(cond->AsCondition()); 3369 } 3370 3371 if (DataType::IsFloatingPointType(select->GetType())) { 3372 __ Fcsel(OutputFPRegister(select), 3373 InputFPRegisterAt(select, 1), 3374 InputFPRegisterAt(select, 0), 3375 csel_cond); 3376 } else { 3377 __ Csel(OutputRegister(select), 3378 InputOperandAt(select, 1), 3379 InputOperandAt(select, 0), 3380 csel_cond); 3381 } 3382 } 3383 3384 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 3385 new (GetGraph()->GetAllocator()) LocationSummary(info); 3386 } 3387 3388 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) { 3389 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 3390 } 3391 3392 void CodeGeneratorARM64::GenerateNop() { 3393 __ Nop(); 3394 } 3395 3396 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3397 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3398 } 3399 3400 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3401 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3402 } 3403 3404 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3405 HandleFieldSet(instruction); 3406 } 3407 3408 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3409 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 3410 } 3411 3412 // Temp is used for read barrier. 3413 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 3414 if (kEmitCompilerReadBarrier && 3415 (kUseBakerReadBarrier || 3416 type_check_kind == TypeCheckKind::kAbstractClassCheck || 3417 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 3418 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 3419 return 1; 3420 } 3421 return 0; 3422 } 3423 3424 // Interface case has 3 temps, one for holding the number of interfaces, one for the current 3425 // interface pointer, one for loading the current interface. 3426 // The other checks have one temp for loading the object's class. 3427 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 3428 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 3429 return 3; 3430 } 3431 return 1 + NumberOfInstanceOfTemps(type_check_kind); 3432 } 3433 3434 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { 3435 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 3436 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3437 bool baker_read_barrier_slow_path = false; 3438 switch (type_check_kind) { 3439 case TypeCheckKind::kExactCheck: 3440 case TypeCheckKind::kAbstractClassCheck: 3441 case TypeCheckKind::kClassHierarchyCheck: 3442 case TypeCheckKind::kArrayObjectCheck: { 3443 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 3444 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 3445 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 3446 break; 3447 } 3448 case TypeCheckKind::kArrayCheck: 3449 case TypeCheckKind::kUnresolvedCheck: 3450 case TypeCheckKind::kInterfaceCheck: 3451 call_kind = LocationSummary::kCallOnSlowPath; 3452 break; 3453 case TypeCheckKind::kBitstringCheck: 3454 break; 3455 } 3456 3457 LocationSummary* locations = 3458 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 3459 if (baker_read_barrier_slow_path) { 3460 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 3461 } 3462 locations->SetInAt(0, Location::RequiresRegister()); 3463 if (type_check_kind == TypeCheckKind::kBitstringCheck) { 3464 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 3465 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 3466 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 3467 } else { 3468 locations->SetInAt(1, Location::RequiresRegister()); 3469 } 3470 // The "out" register is used as a temporary, so it overlaps with the inputs. 3471 // Note that TypeCheckSlowPathARM64 uses this register too. 3472 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 3473 // Add temps if necessary for read barriers. 3474 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 3475 } 3476 3477 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { 3478 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3479 LocationSummary* locations = instruction->GetLocations(); 3480 Location obj_loc = locations->InAt(0); 3481 Register obj = InputRegisterAt(instruction, 0); 3482 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) 3483 ? Register() 3484 : InputRegisterAt(instruction, 1); 3485 Location out_loc = locations->Out(); 3486 Register out = OutputRegister(instruction); 3487 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 3488 DCHECK_LE(num_temps, 1u); 3489 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); 3490 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3491 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3492 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3493 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3494 3495 vixl::aarch64::Label done, zero; 3496 SlowPathCodeARM64* slow_path = nullptr; 3497 3498 // Return 0 if `obj` is null. 3499 // Avoid null check if we know `obj` is not null. 3500 if (instruction->MustDoNullCheck()) { 3501 __ Cbz(obj, &zero); 3502 } 3503 3504 switch (type_check_kind) { 3505 case TypeCheckKind::kExactCheck: { 3506 ReadBarrierOption read_barrier_option = 3507 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3508 // /* HeapReference<Class> */ out = obj->klass_ 3509 GenerateReferenceLoadTwoRegisters(instruction, 3510 out_loc, 3511 obj_loc, 3512 class_offset, 3513 maybe_temp_loc, 3514 read_barrier_option); 3515 __ Cmp(out, cls); 3516 __ Cset(out, eq); 3517 if (zero.IsLinked()) { 3518 __ B(&done); 3519 } 3520 break; 3521 } 3522 3523 case TypeCheckKind::kAbstractClassCheck: { 3524 ReadBarrierOption read_barrier_option = 3525 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3526 // /* HeapReference<Class> */ out = obj->klass_ 3527 GenerateReferenceLoadTwoRegisters(instruction, 3528 out_loc, 3529 obj_loc, 3530 class_offset, 3531 maybe_temp_loc, 3532 read_barrier_option); 3533 // If the class is abstract, we eagerly fetch the super class of the 3534 // object to avoid doing a comparison we know will fail. 3535 vixl::aarch64::Label loop, success; 3536 __ Bind(&loop); 3537 // /* HeapReference<Class> */ out = out->super_class_ 3538 GenerateReferenceLoadOneRegister(instruction, 3539 out_loc, 3540 super_offset, 3541 maybe_temp_loc, 3542 read_barrier_option); 3543 // If `out` is null, we use it for the result, and jump to `done`. 3544 __ Cbz(out, &done); 3545 __ Cmp(out, cls); 3546 __ B(ne, &loop); 3547 __ Mov(out, 1); 3548 if (zero.IsLinked()) { 3549 __ B(&done); 3550 } 3551 break; 3552 } 3553 3554 case TypeCheckKind::kClassHierarchyCheck: { 3555 ReadBarrierOption read_barrier_option = 3556 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3557 // /* HeapReference<Class> */ out = obj->klass_ 3558 GenerateReferenceLoadTwoRegisters(instruction, 3559 out_loc, 3560 obj_loc, 3561 class_offset, 3562 maybe_temp_loc, 3563 read_barrier_option); 3564 // Walk over the class hierarchy to find a match. 3565 vixl::aarch64::Label loop, success; 3566 __ Bind(&loop); 3567 __ Cmp(out, cls); 3568 __ B(eq, &success); 3569 // /* HeapReference<Class> */ out = out->super_class_ 3570 GenerateReferenceLoadOneRegister(instruction, 3571 out_loc, 3572 super_offset, 3573 maybe_temp_loc, 3574 read_barrier_option); 3575 __ Cbnz(out, &loop); 3576 // If `out` is null, we use it for the result, and jump to `done`. 3577 __ B(&done); 3578 __ Bind(&success); 3579 __ Mov(out, 1); 3580 if (zero.IsLinked()) { 3581 __ B(&done); 3582 } 3583 break; 3584 } 3585 3586 case TypeCheckKind::kArrayObjectCheck: { 3587 ReadBarrierOption read_barrier_option = 3588 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3589 // /* HeapReference<Class> */ out = obj->klass_ 3590 GenerateReferenceLoadTwoRegisters(instruction, 3591 out_loc, 3592 obj_loc, 3593 class_offset, 3594 maybe_temp_loc, 3595 read_barrier_option); 3596 // Do an exact check. 3597 vixl::aarch64::Label exact_check; 3598 __ Cmp(out, cls); 3599 __ B(eq, &exact_check); 3600 // Otherwise, we need to check that the object's class is a non-primitive array. 3601 // /* HeapReference<Class> */ out = out->component_type_ 3602 GenerateReferenceLoadOneRegister(instruction, 3603 out_loc, 3604 component_offset, 3605 maybe_temp_loc, 3606 read_barrier_option); 3607 // If `out` is null, we use it for the result, and jump to `done`. 3608 __ Cbz(out, &done); 3609 __ Ldrh(out, HeapOperand(out, primitive_offset)); 3610 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 3611 __ Cbnz(out, &zero); 3612 __ Bind(&exact_check); 3613 __ Mov(out, 1); 3614 __ B(&done); 3615 break; 3616 } 3617 3618 case TypeCheckKind::kArrayCheck: { 3619 // No read barrier since the slow path will retry upon failure. 3620 // /* HeapReference<Class> */ out = obj->klass_ 3621 GenerateReferenceLoadTwoRegisters(instruction, 3622 out_loc, 3623 obj_loc, 3624 class_offset, 3625 maybe_temp_loc, 3626 kWithoutReadBarrier); 3627 __ Cmp(out, cls); 3628 DCHECK(locations->OnlyCallsOnSlowPath()); 3629 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 3630 instruction, /* is_fatal= */ false); 3631 codegen_->AddSlowPath(slow_path); 3632 __ B(ne, slow_path->GetEntryLabel()); 3633 __ Mov(out, 1); 3634 if (zero.IsLinked()) { 3635 __ B(&done); 3636 } 3637 break; 3638 } 3639 3640 case TypeCheckKind::kUnresolvedCheck: 3641 case TypeCheckKind::kInterfaceCheck: { 3642 // Note that we indeed only call on slow path, but we always go 3643 // into the slow path for the unresolved and interface check 3644 // cases. 3645 // 3646 // We cannot directly call the InstanceofNonTrivial runtime 3647 // entry point without resorting to a type checking slow path 3648 // here (i.e. by calling InvokeRuntime directly), as it would 3649 // require to assign fixed registers for the inputs of this 3650 // HInstanceOf instruction (following the runtime calling 3651 // convention), which might be cluttered by the potential first 3652 // read barrier emission at the beginning of this method. 3653 // 3654 // TODO: Introduce a new runtime entry point taking the object 3655 // to test (instead of its class) as argument, and let it deal 3656 // with the read barrier issues. This will let us refactor this 3657 // case of the `switch` code as it was previously (with a direct 3658 // call to the runtime not using a type checking slow path). 3659 // This should also be beneficial for the other cases above. 3660 DCHECK(locations->OnlyCallsOnSlowPath()); 3661 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 3662 instruction, /* is_fatal= */ false); 3663 codegen_->AddSlowPath(slow_path); 3664 __ B(slow_path->GetEntryLabel()); 3665 if (zero.IsLinked()) { 3666 __ B(&done); 3667 } 3668 break; 3669 } 3670 3671 case TypeCheckKind::kBitstringCheck: { 3672 // /* HeapReference<Class> */ temp = obj->klass_ 3673 GenerateReferenceLoadTwoRegisters(instruction, 3674 out_loc, 3675 obj_loc, 3676 class_offset, 3677 maybe_temp_loc, 3678 kWithoutReadBarrier); 3679 3680 GenerateBitstringTypeCheckCompare(instruction, out); 3681 __ Cset(out, eq); 3682 if (zero.IsLinked()) { 3683 __ B(&done); 3684 } 3685 break; 3686 } 3687 } 3688 3689 if (zero.IsLinked()) { 3690 __ Bind(&zero); 3691 __ Mov(out, 0); 3692 } 3693 3694 if (done.IsLinked()) { 3695 __ Bind(&done); 3696 } 3697 3698 if (slow_path != nullptr) { 3699 __ Bind(slow_path->GetExitLabel()); 3700 } 3701 } 3702 3703 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { 3704 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3705 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 3706 LocationSummary* locations = 3707 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 3708 locations->SetInAt(0, Location::RequiresRegister()); 3709 if (type_check_kind == TypeCheckKind::kBitstringCheck) { 3710 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 3711 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 3712 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 3713 } else { 3714 locations->SetInAt(1, Location::RequiresRegister()); 3715 } 3716 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. 3717 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 3718 } 3719 3720 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { 3721 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3722 LocationSummary* locations = instruction->GetLocations(); 3723 Location obj_loc = locations->InAt(0); 3724 Register obj = InputRegisterAt(instruction, 0); 3725 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) 3726 ? Register() 3727 : InputRegisterAt(instruction, 1); 3728 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 3729 DCHECK_GE(num_temps, 1u); 3730 DCHECK_LE(num_temps, 3u); 3731 Location temp_loc = locations->GetTemp(0); 3732 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); 3733 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); 3734 Register temp = WRegisterFrom(temp_loc); 3735 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3736 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3737 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3738 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3739 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 3740 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 3741 const uint32_t object_array_data_offset = 3742 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 3743 3744 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 3745 SlowPathCodeARM64* type_check_slow_path = 3746 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 3747 instruction, is_type_check_slow_path_fatal); 3748 codegen_->AddSlowPath(type_check_slow_path); 3749 3750 vixl::aarch64::Label done; 3751 // Avoid null check if we know obj is not null. 3752 if (instruction->MustDoNullCheck()) { 3753 __ Cbz(obj, &done); 3754 } 3755 3756 switch (type_check_kind) { 3757 case TypeCheckKind::kExactCheck: 3758 case TypeCheckKind::kArrayCheck: { 3759 // /* HeapReference<Class> */ temp = obj->klass_ 3760 GenerateReferenceLoadTwoRegisters(instruction, 3761 temp_loc, 3762 obj_loc, 3763 class_offset, 3764 maybe_temp2_loc, 3765 kWithoutReadBarrier); 3766 3767 __ Cmp(temp, cls); 3768 // Jump to slow path for throwing the exception or doing a 3769 // more involved array check. 3770 __ B(ne, type_check_slow_path->GetEntryLabel()); 3771 break; 3772 } 3773 3774 case TypeCheckKind::kAbstractClassCheck: { 3775 // /* HeapReference<Class> */ temp = obj->klass_ 3776 GenerateReferenceLoadTwoRegisters(instruction, 3777 temp_loc, 3778 obj_loc, 3779 class_offset, 3780 maybe_temp2_loc, 3781 kWithoutReadBarrier); 3782 3783 // If the class is abstract, we eagerly fetch the super class of the 3784 // object to avoid doing a comparison we know will fail. 3785 vixl::aarch64::Label loop; 3786 __ Bind(&loop); 3787 // /* HeapReference<Class> */ temp = temp->super_class_ 3788 GenerateReferenceLoadOneRegister(instruction, 3789 temp_loc, 3790 super_offset, 3791 maybe_temp2_loc, 3792 kWithoutReadBarrier); 3793 3794 // If the class reference currently in `temp` is null, jump to the slow path to throw the 3795 // exception. 3796 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 3797 // Otherwise, compare classes. 3798 __ Cmp(temp, cls); 3799 __ B(ne, &loop); 3800 break; 3801 } 3802 3803 case TypeCheckKind::kClassHierarchyCheck: { 3804 // /* HeapReference<Class> */ temp = obj->klass_ 3805 GenerateReferenceLoadTwoRegisters(instruction, 3806 temp_loc, 3807 obj_loc, 3808 class_offset, 3809 maybe_temp2_loc, 3810 kWithoutReadBarrier); 3811 3812 // Walk over the class hierarchy to find a match. 3813 vixl::aarch64::Label loop; 3814 __ Bind(&loop); 3815 __ Cmp(temp, cls); 3816 __ B(eq, &done); 3817 3818 // /* HeapReference<Class> */ temp = temp->super_class_ 3819 GenerateReferenceLoadOneRegister(instruction, 3820 temp_loc, 3821 super_offset, 3822 maybe_temp2_loc, 3823 kWithoutReadBarrier); 3824 3825 // If the class reference currently in `temp` is not null, jump 3826 // back at the beginning of the loop. 3827 __ Cbnz(temp, &loop); 3828 // Otherwise, jump to the slow path to throw the exception. 3829 __ B(type_check_slow_path->GetEntryLabel()); 3830 break; 3831 } 3832 3833 case TypeCheckKind::kArrayObjectCheck: { 3834 // /* HeapReference<Class> */ temp = obj->klass_ 3835 GenerateReferenceLoadTwoRegisters(instruction, 3836 temp_loc, 3837 obj_loc, 3838 class_offset, 3839 maybe_temp2_loc, 3840 kWithoutReadBarrier); 3841 3842 // Do an exact check. 3843 __ Cmp(temp, cls); 3844 __ B(eq, &done); 3845 3846 // Otherwise, we need to check that the object's class is a non-primitive array. 3847 // /* HeapReference<Class> */ temp = temp->component_type_ 3848 GenerateReferenceLoadOneRegister(instruction, 3849 temp_loc, 3850 component_offset, 3851 maybe_temp2_loc, 3852 kWithoutReadBarrier); 3853 3854 // If the component type is null, jump to the slow path to throw the exception. 3855 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 3856 // Otherwise, the object is indeed an array. Further check that this component type is not a 3857 // primitive type. 3858 __ Ldrh(temp, HeapOperand(temp, primitive_offset)); 3859 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 3860 __ Cbnz(temp, type_check_slow_path->GetEntryLabel()); 3861 break; 3862 } 3863 3864 case TypeCheckKind::kUnresolvedCheck: 3865 // We always go into the type check slow path for the unresolved check cases. 3866 // 3867 // We cannot directly call the CheckCast runtime entry point 3868 // without resorting to a type checking slow path here (i.e. by 3869 // calling InvokeRuntime directly), as it would require to 3870 // assign fixed registers for the inputs of this HInstanceOf 3871 // instruction (following the runtime calling convention), which 3872 // might be cluttered by the potential first read barrier 3873 // emission at the beginning of this method. 3874 __ B(type_check_slow_path->GetEntryLabel()); 3875 break; 3876 case TypeCheckKind::kInterfaceCheck: { 3877 // /* HeapReference<Class> */ temp = obj->klass_ 3878 GenerateReferenceLoadTwoRegisters(instruction, 3879 temp_loc, 3880 obj_loc, 3881 class_offset, 3882 maybe_temp2_loc, 3883 kWithoutReadBarrier); 3884 3885 // /* HeapReference<Class> */ temp = temp->iftable_ 3886 GenerateReferenceLoadTwoRegisters(instruction, 3887 temp_loc, 3888 temp_loc, 3889 iftable_offset, 3890 maybe_temp2_loc, 3891 kWithoutReadBarrier); 3892 // Iftable is never null. 3893 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset)); 3894 // Loop through the iftable and check if any class matches. 3895 vixl::aarch64::Label start_loop; 3896 __ Bind(&start_loop); 3897 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel()); 3898 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset)); 3899 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc)); 3900 // Go to next interface. 3901 __ Add(temp, temp, 2 * kHeapReferenceSize); 3902 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2); 3903 // Compare the classes and continue the loop if they do not match. 3904 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc)); 3905 __ B(ne, &start_loop); 3906 break; 3907 } 3908 3909 case TypeCheckKind::kBitstringCheck: { 3910 // /* HeapReference<Class> */ temp = obj->klass_ 3911 GenerateReferenceLoadTwoRegisters(instruction, 3912 temp_loc, 3913 obj_loc, 3914 class_offset, 3915 maybe_temp2_loc, 3916 kWithoutReadBarrier); 3917 3918 GenerateBitstringTypeCheckCompare(instruction, temp); 3919 __ B(ne, type_check_slow_path->GetEntryLabel()); 3920 break; 3921 } 3922 } 3923 __ Bind(&done); 3924 3925 __ Bind(type_check_slow_path->GetExitLabel()); 3926 } 3927 3928 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { 3929 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 3930 locations->SetOut(Location::ConstantLocation(constant)); 3931 } 3932 3933 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 3934 // Will be generated at use site. 3935 } 3936 3937 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { 3938 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 3939 locations->SetOut(Location::ConstantLocation(constant)); 3940 } 3941 3942 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 3943 // Will be generated at use site. 3944 } 3945 3946 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 3947 // The trampoline uses the same calling convention as dex calling conventions, 3948 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 3949 // the method_idx. 3950 HandleInvoke(invoke); 3951 } 3952 3953 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 3954 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 3955 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 3956 } 3957 3958 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { 3959 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 3960 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 3961 } 3962 3963 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 3964 HandleInvoke(invoke); 3965 } 3966 3967 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 3968 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 3969 LocationSummary* locations = invoke->GetLocations(); 3970 Register temp = XRegisterFrom(locations->GetTemp(0)); 3971 Location receiver = locations->InAt(0); 3972 Offset class_offset = mirror::Object::ClassOffset(); 3973 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 3974 3975 // The register ip1 is required to be used for the hidden argument in 3976 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. 3977 MacroAssembler* masm = GetVIXLAssembler(); 3978 UseScratchRegisterScope scratch_scope(masm); 3979 scratch_scope.Exclude(ip1); 3980 __ Mov(ip1, invoke->GetDexMethodIndex()); 3981 3982 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 3983 if (receiver.IsStackSlot()) { 3984 __ Ldr(temp.W(), StackOperandFrom(receiver)); 3985 { 3986 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3987 // /* HeapReference<Class> */ temp = temp->klass_ 3988 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); 3989 codegen_->MaybeRecordImplicitNullCheck(invoke); 3990 } 3991 } else { 3992 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 3993 // /* HeapReference<Class> */ temp = receiver->klass_ 3994 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); 3995 codegen_->MaybeRecordImplicitNullCheck(invoke); 3996 } 3997 3998 // Instead of simply (possibly) unpoisoning `temp` here, we should 3999 // emit a read barrier for the previous class reference load. 4000 // However this is not required in practice, as this is an 4001 // intermediate/temporary reference and because the current 4002 // concurrent copying collector keeps the from-space memory 4003 // intact/accessible until the end of the marking phase (the 4004 // concurrent copying collector may not in the future). 4005 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4006 __ Ldr(temp, 4007 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 4008 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4009 invoke->GetImtIndex(), kArm64PointerSize)); 4010 // temp = temp->GetImtEntryAt(method_offset); 4011 __ Ldr(temp, MemOperand(temp, method_offset)); 4012 // lr = temp->GetEntryPoint(); 4013 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value())); 4014 4015 { 4016 // Ensure the pc position is recorded immediately after the `blr` instruction. 4017 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4018 4019 // lr(); 4020 __ blr(lr); 4021 DCHECK(!codegen_->IsLeafMethod()); 4022 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 4023 } 4024 4025 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4026 } 4027 4028 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4029 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); 4030 if (intrinsic.TryDispatch(invoke)) { 4031 return; 4032 } 4033 4034 HandleInvoke(invoke); 4035 } 4036 4037 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4038 // Explicit clinit checks triggered by static invokes must have been pruned by 4039 // art::PrepareForRegisterAllocation. 4040 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4041 4042 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); 4043 if (intrinsic.TryDispatch(invoke)) { 4044 return; 4045 } 4046 4047 HandleInvoke(invoke); 4048 } 4049 4050 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) { 4051 if (invoke->GetLocations()->Intrinsified()) { 4052 IntrinsicCodeGeneratorARM64 intrinsic(codegen); 4053 intrinsic.Dispatch(invoke); 4054 return true; 4055 } 4056 return false; 4057 } 4058 4059 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( 4060 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 4061 ArtMethod* method ATTRIBUTE_UNUSED) { 4062 // On ARM64 we support all dispatch types. 4063 return desired_dispatch_info; 4064 } 4065 4066 void CodeGeneratorARM64::GenerateStaticOrDirectCall( 4067 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 4068 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. 4069 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 4070 switch (invoke->GetMethodLoadKind()) { 4071 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 4072 uint32_t offset = 4073 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 4074 // temp = thread->string_init_entrypoint 4075 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset)); 4076 break; 4077 } 4078 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 4079 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 4080 break; 4081 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { 4082 DCHECK(GetCompilerOptions().IsBootImage()); 4083 // Add ADRP with its PC-relative method patch. 4084 vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod()); 4085 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4086 // Add ADD with its PC-relative method patch. 4087 vixl::aarch64::Label* add_label = 4088 NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label); 4089 EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4090 break; 4091 } 4092 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { 4093 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 4094 uint32_t boot_image_offset = GetBootImageOffset(invoke); 4095 vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset); 4096 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4097 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 4098 vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label); 4099 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. 4100 EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp)); 4101 break; 4102 } 4103 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 4104 // Add ADRP with its PC-relative .bss entry patch. 4105 MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 4106 vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); 4107 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4108 // Add LDR with its PC-relative .bss entry patch. 4109 vixl::aarch64::Label* ldr_label = 4110 NewMethodBssEntryPatch(target_method, adrp_label); 4111 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4112 break; 4113 } 4114 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: 4115 // Load method address from literal pool. 4116 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); 4117 break; 4118 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 4119 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 4120 return; // No code pointer retrieval; the runtime performs the call directly. 4121 } 4122 } 4123 4124 switch (invoke->GetCodePtrLocation()) { 4125 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 4126 { 4127 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4128 ExactAssemblyScope eas(GetVIXLAssembler(), 4129 kInstructionSize, 4130 CodeBufferCheckScope::kExactSize); 4131 __ bl(&frame_entry_label_); 4132 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4133 } 4134 break; 4135 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 4136 // LR = callee_method->entry_point_from_quick_compiled_code_; 4137 __ Ldr(lr, MemOperand( 4138 XRegisterFrom(callee_method), 4139 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value())); 4140 { 4141 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4142 ExactAssemblyScope eas(GetVIXLAssembler(), 4143 kInstructionSize, 4144 CodeBufferCheckScope::kExactSize); 4145 // lr() 4146 __ blr(lr); 4147 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4148 } 4149 break; 4150 } 4151 4152 DCHECK(!IsLeafMethod()); 4153 } 4154 4155 void CodeGeneratorARM64::GenerateVirtualCall( 4156 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 4157 // Use the calling convention instead of the location of the receiver, as 4158 // intrinsics may have put the receiver in a different register. In the intrinsics 4159 // slow path, the arguments have been moved to the right place, so here we are 4160 // guaranteed that the receiver is the first register of the calling convention. 4161 InvokeDexCallingConvention calling_convention; 4162 Register receiver = calling_convention.GetRegisterAt(0); 4163 Register temp = XRegisterFrom(temp_in); 4164 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4165 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); 4166 Offset class_offset = mirror::Object::ClassOffset(); 4167 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 4168 4169 DCHECK(receiver.IsRegister()); 4170 4171 { 4172 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 4173 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4174 // /* HeapReference<Class> */ temp = receiver->klass_ 4175 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset)); 4176 MaybeRecordImplicitNullCheck(invoke); 4177 } 4178 // Instead of simply (possibly) unpoisoning `temp` here, we should 4179 // emit a read barrier for the previous class reference load. 4180 // intermediate/temporary reference and because the current 4181 // concurrent copying collector keeps the from-space memory 4182 // intact/accessible until the end of the marking phase (the 4183 // concurrent copying collector may not in the future). 4184 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4185 // temp = temp->GetMethodAt(method_offset); 4186 __ Ldr(temp, MemOperand(temp, method_offset)); 4187 // lr = temp->GetEntryPoint(); 4188 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); 4189 { 4190 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4191 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4192 // lr(); 4193 __ blr(lr); 4194 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4195 } 4196 } 4197 4198 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4199 HandleInvoke(invoke); 4200 } 4201 4202 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4203 codegen_->GenerateInvokePolymorphicCall(invoke); 4204 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4205 } 4206 4207 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) { 4208 HandleInvoke(invoke); 4209 } 4210 4211 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) { 4212 codegen_->GenerateInvokeCustomCall(invoke); 4213 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4214 } 4215 4216 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch( 4217 uint32_t intrinsic_data, 4218 vixl::aarch64::Label* adrp_label) { 4219 return NewPcRelativePatch( 4220 /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_intrinsic_patches_); 4221 } 4222 4223 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( 4224 uint32_t boot_image_offset, 4225 vixl::aarch64::Label* adrp_label) { 4226 return NewPcRelativePatch( 4227 /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_); 4228 } 4229 4230 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( 4231 MethodReference target_method, 4232 vixl::aarch64::Label* adrp_label) { 4233 return NewPcRelativePatch( 4234 target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_); 4235 } 4236 4237 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( 4238 MethodReference target_method, 4239 vixl::aarch64::Label* adrp_label) { 4240 return NewPcRelativePatch( 4241 target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_); 4242 } 4243 4244 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch( 4245 const DexFile& dex_file, 4246 dex::TypeIndex type_index, 4247 vixl::aarch64::Label* adrp_label) { 4248 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_); 4249 } 4250 4251 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( 4252 const DexFile& dex_file, 4253 dex::TypeIndex type_index, 4254 vixl::aarch64::Label* adrp_label) { 4255 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); 4256 } 4257 4258 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch( 4259 const DexFile& dex_file, 4260 dex::StringIndex string_index, 4261 vixl::aarch64::Label* adrp_label) { 4262 return NewPcRelativePatch( 4263 &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_); 4264 } 4265 4266 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( 4267 const DexFile& dex_file, 4268 dex::StringIndex string_index, 4269 vixl::aarch64::Label* adrp_label) { 4270 return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); 4271 } 4272 4273 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { 4274 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. 4275 if (Runtime::Current()->UseJitCompilation()) { 4276 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); 4277 vixl::aarch64::Label* slow_path_entry = &it->second.label; 4278 __ cbnz(mr, slow_path_entry); 4279 } else { 4280 baker_read_barrier_patches_.emplace_back(custom_data); 4281 vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label; 4282 __ bind(cbnz_label); 4283 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 4284 } 4285 } 4286 4287 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( 4288 const DexFile* dex_file, 4289 uint32_t offset_or_index, 4290 vixl::aarch64::Label* adrp_label, 4291 ArenaDeque<PcRelativePatchInfo>* patches) { 4292 // Add a patch entry and return the label. 4293 patches->emplace_back(dex_file, offset_or_index); 4294 PcRelativePatchInfo* info = &patches->back(); 4295 vixl::aarch64::Label* label = &info->label; 4296 // If adrp_label is null, this is the ADRP patch and needs to point to its own label. 4297 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label; 4298 return label; 4299 } 4300 4301 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( 4302 uint64_t address) { 4303 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); 4304 } 4305 4306 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( 4307 const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { 4308 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 4309 return jit_string_patches_.GetOrCreate( 4310 StringReference(&dex_file, string_index), 4311 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); 4312 } 4313 4314 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( 4315 const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { 4316 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 4317 return jit_class_patches_.GetOrCreate( 4318 TypeReference(&dex_file, type_index), 4319 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); 4320 } 4321 4322 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, 4323 vixl::aarch64::Register reg) { 4324 DCHECK(reg.IsX()); 4325 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4326 __ Bind(fixup_label); 4327 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0)); 4328 } 4329 4330 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, 4331 vixl::aarch64::Register out, 4332 vixl::aarch64::Register base) { 4333 DCHECK(out.IsX()); 4334 DCHECK(base.IsX()); 4335 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4336 __ Bind(fixup_label); 4337 __ add(out, base, Operand(/* offset placeholder */ 0)); 4338 } 4339 4340 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, 4341 vixl::aarch64::Register out, 4342 vixl::aarch64::Register base) { 4343 DCHECK(base.IsX()); 4344 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4345 __ Bind(fixup_label); 4346 __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); 4347 } 4348 4349 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg, 4350 uint32_t boot_image_reference) { 4351 if (GetCompilerOptions().IsBootImage()) { 4352 // Add ADRP with its PC-relative type patch. 4353 vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference); 4354 EmitAdrpPlaceholder(adrp_label, reg.X()); 4355 // Add ADD with its PC-relative type patch. 4356 vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label); 4357 EmitAddPlaceholder(add_label, reg.X(), reg.X()); 4358 } else if (GetCompilerOptions().GetCompilePic()) { 4359 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 4360 vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference); 4361 EmitAdrpPlaceholder(adrp_label, reg.X()); 4362 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 4363 vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label); 4364 EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X()); 4365 } else { 4366 DCHECK(Runtime::Current()->UseJitCompilation()); 4367 gc::Heap* heap = Runtime::Current()->GetHeap(); 4368 DCHECK(!heap->GetBootImageSpaces().empty()); 4369 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; 4370 __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address))); 4371 } 4372 } 4373 4374 void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, 4375 uint32_t boot_image_offset) { 4376 DCHECK(invoke->IsStatic()); 4377 InvokeRuntimeCallingConvention calling_convention; 4378 Register argument = calling_convention.GetRegisterAt(0); 4379 if (GetCompilerOptions().IsBootImage()) { 4380 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); 4381 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. 4382 MethodReference target_method = invoke->GetTargetMethod(); 4383 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; 4384 // Add ADRP with its PC-relative type patch. 4385 vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx); 4386 EmitAdrpPlaceholder(adrp_label, argument.X()); 4387 // Add ADD with its PC-relative type patch. 4388 vixl::aarch64::Label* add_label = 4389 NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label); 4390 EmitAddPlaceholder(add_label, argument.X(), argument.X()); 4391 } else { 4392 LoadBootImageAddress(argument, boot_image_offset); 4393 } 4394 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 4395 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 4396 } 4397 4398 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 4399 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( 4400 const ArenaDeque<PcRelativePatchInfo>& infos, 4401 ArenaVector<linker::LinkerPatch>* linker_patches) { 4402 for (const PcRelativePatchInfo& info : infos) { 4403 linker_patches->push_back(Factory(info.label.GetLocation(), 4404 info.target_dex_file, 4405 info.pc_insn_label->GetLocation(), 4406 info.offset_or_index)); 4407 } 4408 } 4409 4410 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> 4411 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, 4412 const DexFile* target_dex_file, 4413 uint32_t pc_insn_offset, 4414 uint32_t boot_image_offset) { 4415 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. 4416 return Factory(literal_offset, pc_insn_offset, boot_image_offset); 4417 } 4418 4419 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 4420 DCHECK(linker_patches->empty()); 4421 size_t size = 4422 boot_image_method_patches_.size() + 4423 method_bss_entry_patches_.size() + 4424 boot_image_type_patches_.size() + 4425 type_bss_entry_patches_.size() + 4426 boot_image_string_patches_.size() + 4427 string_bss_entry_patches_.size() + 4428 boot_image_intrinsic_patches_.size() + 4429 baker_read_barrier_patches_.size(); 4430 linker_patches->reserve(size); 4431 if (GetCompilerOptions().IsBootImage()) { 4432 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 4433 boot_image_method_patches_, linker_patches); 4434 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 4435 boot_image_type_patches_, linker_patches); 4436 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 4437 boot_image_string_patches_, linker_patches); 4438 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( 4439 boot_image_intrinsic_patches_, linker_patches); 4440 } else { 4441 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( 4442 boot_image_method_patches_, linker_patches); 4443 DCHECK(boot_image_type_patches_.empty()); 4444 DCHECK(boot_image_string_patches_.empty()); 4445 DCHECK(boot_image_intrinsic_patches_.empty()); 4446 } 4447 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 4448 method_bss_entry_patches_, linker_patches); 4449 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 4450 type_bss_entry_patches_, linker_patches); 4451 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 4452 string_bss_entry_patches_, linker_patches); 4453 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { 4454 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( 4455 info.label.GetLocation(), info.custom_data)); 4456 } 4457 DCHECK_EQ(size, linker_patches->size()); 4458 } 4459 4460 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const { 4461 return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || 4462 patch.GetType() == linker::LinkerPatch::Type::kCallRelative; 4463 } 4464 4465 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, 4466 /*out*/ ArenaVector<uint8_t>* code, 4467 /*out*/ std::string* debug_name) { 4468 Arm64Assembler assembler(GetGraph()->GetAllocator()); 4469 switch (patch.GetType()) { 4470 case linker::LinkerPatch::Type::kCallRelative: { 4471 // The thunk just uses the entry point in the ArtMethod. This works even for calls 4472 // to the generic JNI and interpreter trampolines. 4473 Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( 4474 kArm64PointerSize).Int32Value()); 4475 assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); 4476 if (GetCompilerOptions().GenerateAnyDebugInfo()) { 4477 *debug_name = "MethodCallThunk"; 4478 } 4479 break; 4480 } 4481 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: { 4482 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); 4483 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); 4484 break; 4485 } 4486 default: 4487 LOG(FATAL) << "Unexpected patch type " << patch.GetType(); 4488 UNREACHABLE(); 4489 } 4490 4491 // Ensure we emit the literal pool if any. 4492 assembler.FinalizeCode(); 4493 code->resize(assembler.CodeSize()); 4494 MemoryRegion code_region(code->data(), code->size()); 4495 assembler.FinalizeInstructions(code_region); 4496 } 4497 4498 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { 4499 return uint32_literals_.GetOrCreate( 4500 value, 4501 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); 4502 } 4503 4504 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) { 4505 return uint64_literals_.GetOrCreate( 4506 value, 4507 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); }); 4508 } 4509 4510 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4511 // Explicit clinit checks triggered by static invokes must have been pruned by 4512 // art::PrepareForRegisterAllocation. 4513 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4514 4515 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4516 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4517 return; 4518 } 4519 4520 { 4521 // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there 4522 // are no pools emitted. 4523 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4524 LocationSummary* locations = invoke->GetLocations(); 4525 codegen_->GenerateStaticOrDirectCall( 4526 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 4527 } 4528 4529 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4530 } 4531 4532 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4533 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4534 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4535 return; 4536 } 4537 4538 { 4539 // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there 4540 // are no pools emitted. 4541 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4542 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 4543 DCHECK(!codegen_->IsLeafMethod()); 4544 } 4545 4546 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4547 } 4548 4549 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( 4550 HLoadClass::LoadKind desired_class_load_kind) { 4551 switch (desired_class_load_kind) { 4552 case HLoadClass::LoadKind::kInvalid: 4553 LOG(FATAL) << "UNREACHABLE"; 4554 UNREACHABLE(); 4555 case HLoadClass::LoadKind::kReferrersClass: 4556 break; 4557 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 4558 case HLoadClass::LoadKind::kBootImageRelRo: 4559 case HLoadClass::LoadKind::kBssEntry: 4560 DCHECK(!Runtime::Current()->UseJitCompilation()); 4561 break; 4562 case HLoadClass::LoadKind::kJitBootImageAddress: 4563 case HLoadClass::LoadKind::kJitTableAddress: 4564 DCHECK(Runtime::Current()->UseJitCompilation()); 4565 break; 4566 case HLoadClass::LoadKind::kRuntimeCall: 4567 break; 4568 } 4569 return desired_class_load_kind; 4570 } 4571 4572 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { 4573 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4574 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4575 InvokeRuntimeCallingConvention calling_convention; 4576 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 4577 cls, 4578 LocationFrom(calling_convention.GetRegisterAt(0)), 4579 LocationFrom(vixl::aarch64::x0)); 4580 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0)); 4581 return; 4582 } 4583 DCHECK(!cls->NeedsAccessCheck()); 4584 4585 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 4586 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 4587 ? LocationSummary::kCallOnSlowPath 4588 : LocationSummary::kNoCall; 4589 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 4590 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 4591 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4592 } 4593 4594 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 4595 locations->SetInAt(0, Location::RequiresRegister()); 4596 } 4597 locations->SetOut(Location::RequiresRegister()); 4598 if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { 4599 if (!kUseReadBarrier || kUseBakerReadBarrier) { 4600 // Rely on the type resolution or initialization and marking to save everything we need. 4601 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 4602 } else { 4603 // For non-Baker read barrier we have a temp-clobbering call. 4604 } 4605 } 4606 } 4607 4608 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 4609 // move. 4610 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 4611 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4612 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4613 codegen_->GenerateLoadClassRuntimeCall(cls); 4614 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4615 return; 4616 } 4617 DCHECK(!cls->NeedsAccessCheck()); 4618 4619 Location out_loc = cls->GetLocations()->Out(); 4620 Register out = OutputRegister(cls); 4621 4622 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 4623 ? kWithoutReadBarrier 4624 : kCompilerReadBarrierOption; 4625 bool generate_null_check = false; 4626 switch (load_kind) { 4627 case HLoadClass::LoadKind::kReferrersClass: { 4628 DCHECK(!cls->CanCallRuntime()); 4629 DCHECK(!cls->MustGenerateClinitCheck()); 4630 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 4631 Register current_method = InputRegisterAt(cls, 0); 4632 codegen_->GenerateGcRootFieldLoad(cls, 4633 out_loc, 4634 current_method, 4635 ArtMethod::DeclaringClassOffset().Int32Value(), 4636 /* fixup_label= */ nullptr, 4637 read_barrier_option); 4638 break; 4639 } 4640 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { 4641 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4642 // Add ADRP with its PC-relative type patch. 4643 const DexFile& dex_file = cls->GetDexFile(); 4644 dex::TypeIndex type_index = cls->GetTypeIndex(); 4645 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); 4646 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4647 // Add ADD with its PC-relative type patch. 4648 vixl::aarch64::Label* add_label = 4649 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); 4650 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 4651 break; 4652 } 4653 case HLoadClass::LoadKind::kBootImageRelRo: { 4654 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 4655 uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); 4656 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 4657 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); 4658 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4659 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 4660 vixl::aarch64::Label* ldr_label = 4661 codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); 4662 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); 4663 break; 4664 } 4665 case HLoadClass::LoadKind::kBssEntry: { 4666 // Add ADRP with its PC-relative Class .bss entry patch. 4667 const DexFile& dex_file = cls->GetDexFile(); 4668 dex::TypeIndex type_index = cls->GetTypeIndex(); 4669 vixl::aarch64::Register temp = XRegisterFrom(out_loc); 4670 vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); 4671 codegen_->EmitAdrpPlaceholder(adrp_label, temp); 4672 // Add LDR with its PC-relative Class .bss entry patch. 4673 vixl::aarch64::Label* ldr_label = 4674 codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); 4675 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ 4676 codegen_->GenerateGcRootFieldLoad(cls, 4677 out_loc, 4678 temp, 4679 /* offset placeholder */ 0u, 4680 ldr_label, 4681 read_barrier_option); 4682 generate_null_check = true; 4683 break; 4684 } 4685 case HLoadClass::LoadKind::kJitBootImageAddress: { 4686 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4687 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); 4688 DCHECK_NE(address, 0u); 4689 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 4690 break; 4691 } 4692 case HLoadClass::LoadKind::kJitTableAddress: { 4693 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), 4694 cls->GetTypeIndex(), 4695 cls->GetClass())); 4696 codegen_->GenerateGcRootFieldLoad(cls, 4697 out_loc, 4698 out.X(), 4699 /* offset= */ 0, 4700 /* fixup_label= */ nullptr, 4701 read_barrier_option); 4702 break; 4703 } 4704 case HLoadClass::LoadKind::kRuntimeCall: 4705 case HLoadClass::LoadKind::kInvalid: 4706 LOG(FATAL) << "UNREACHABLE"; 4707 UNREACHABLE(); 4708 } 4709 4710 bool do_clinit = cls->MustGenerateClinitCheck(); 4711 if (generate_null_check || do_clinit) { 4712 DCHECK(cls->CanCallRuntime()); 4713 SlowPathCodeARM64* slow_path = 4714 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls); 4715 codegen_->AddSlowPath(slow_path); 4716 if (generate_null_check) { 4717 __ Cbz(out, slow_path->GetEntryLabel()); 4718 } 4719 if (cls->MustGenerateClinitCheck()) { 4720 GenerateClassInitializationCheck(slow_path, out); 4721 } else { 4722 __ Bind(slow_path->GetExitLabel()); 4723 } 4724 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4725 } 4726 } 4727 4728 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 4729 InvokeRuntimeCallingConvention calling_convention; 4730 Location location = LocationFrom(calling_convention.GetRegisterAt(0)); 4731 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); 4732 } 4733 4734 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 4735 codegen_->GenerateLoadMethodHandleRuntimeCall(load); 4736 } 4737 4738 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) { 4739 InvokeRuntimeCallingConvention calling_convention; 4740 Location location = LocationFrom(calling_convention.GetRegisterAt(0)); 4741 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); 4742 } 4743 4744 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) { 4745 codegen_->GenerateLoadMethodTypeRuntimeCall(load); 4746 } 4747 4748 static MemOperand GetExceptionTlsAddress() { 4749 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); 4750 } 4751 4752 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { 4753 LocationSummary* locations = 4754 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 4755 locations->SetOut(Location::RequiresRegister()); 4756 } 4757 4758 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) { 4759 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress()); 4760 } 4761 4762 void LocationsBuilderARM64::VisitClearException(HClearException* clear) { 4763 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 4764 } 4765 4766 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 4767 __ Str(wzr, GetExceptionTlsAddress()); 4768 } 4769 4770 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( 4771 HLoadString::LoadKind desired_string_load_kind) { 4772 switch (desired_string_load_kind) { 4773 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 4774 case HLoadString::LoadKind::kBootImageRelRo: 4775 case HLoadString::LoadKind::kBssEntry: 4776 DCHECK(!Runtime::Current()->UseJitCompilation()); 4777 break; 4778 case HLoadString::LoadKind::kJitBootImageAddress: 4779 case HLoadString::LoadKind::kJitTableAddress: 4780 DCHECK(Runtime::Current()->UseJitCompilation()); 4781 break; 4782 case HLoadString::LoadKind::kRuntimeCall: 4783 break; 4784 } 4785 return desired_string_load_kind; 4786 } 4787 4788 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { 4789 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 4790 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 4791 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 4792 InvokeRuntimeCallingConvention calling_convention; 4793 locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); 4794 } else { 4795 locations->SetOut(Location::RequiresRegister()); 4796 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 4797 if (!kUseReadBarrier || kUseBakerReadBarrier) { 4798 // Rely on the pResolveString and marking to save everything we need. 4799 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 4800 } else { 4801 // For non-Baker read barrier we have a temp-clobbering call. 4802 } 4803 } 4804 } 4805 } 4806 4807 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 4808 // move. 4809 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 4810 Register out = OutputRegister(load); 4811 Location out_loc = load->GetLocations()->Out(); 4812 4813 switch (load->GetLoadKind()) { 4814 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 4815 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 4816 // Add ADRP with its PC-relative String patch. 4817 const DexFile& dex_file = load->GetDexFile(); 4818 const dex::StringIndex string_index = load->GetStringIndex(); 4819 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); 4820 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4821 // Add ADD with its PC-relative String patch. 4822 vixl::aarch64::Label* add_label = 4823 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); 4824 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 4825 return; 4826 } 4827 case HLoadString::LoadKind::kBootImageRelRo: { 4828 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 4829 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 4830 uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); 4831 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); 4832 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4833 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 4834 vixl::aarch64::Label* ldr_label = 4835 codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); 4836 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); 4837 return; 4838 } 4839 case HLoadString::LoadKind::kBssEntry: { 4840 // Add ADRP with its PC-relative String .bss entry patch. 4841 const DexFile& dex_file = load->GetDexFile(); 4842 const dex::StringIndex string_index = load->GetStringIndex(); 4843 Register temp = XRegisterFrom(out_loc); 4844 vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); 4845 codegen_->EmitAdrpPlaceholder(adrp_label, temp); 4846 // Add LDR with its PC-relative String .bss entry patch. 4847 vixl::aarch64::Label* ldr_label = 4848 codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); 4849 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ 4850 codegen_->GenerateGcRootFieldLoad(load, 4851 out_loc, 4852 temp, 4853 /* offset placeholder */ 0u, 4854 ldr_label, 4855 kCompilerReadBarrierOption); 4856 SlowPathCodeARM64* slow_path = 4857 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); 4858 codegen_->AddSlowPath(slow_path); 4859 __ Cbz(out.X(), slow_path->GetEntryLabel()); 4860 __ Bind(slow_path->GetExitLabel()); 4861 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4862 return; 4863 } 4864 case HLoadString::LoadKind::kJitBootImageAddress: { 4865 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); 4866 DCHECK_NE(address, 0u); 4867 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 4868 return; 4869 } 4870 case HLoadString::LoadKind::kJitTableAddress: { 4871 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), 4872 load->GetStringIndex(), 4873 load->GetString())); 4874 codegen_->GenerateGcRootFieldLoad(load, 4875 out_loc, 4876 out.X(), 4877 /* offset= */ 0, 4878 /* fixup_label= */ nullptr, 4879 kCompilerReadBarrierOption); 4880 return; 4881 } 4882 default: 4883 break; 4884 } 4885 4886 // TODO: Re-add the compiler code to do string dex cache lookup again. 4887 InvokeRuntimeCallingConvention calling_convention; 4888 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); 4889 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); 4890 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); 4891 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 4892 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4893 } 4894 4895 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { 4896 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 4897 locations->SetOut(Location::ConstantLocation(constant)); 4898 } 4899 4900 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 4901 // Will be generated at use site. 4902 } 4903 4904 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 4905 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4906 instruction, LocationSummary::kCallOnMainOnly); 4907 InvokeRuntimeCallingConvention calling_convention; 4908 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 4909 } 4910 4911 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 4912 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 4913 instruction, 4914 instruction->GetDexPc()); 4915 if (instruction->IsEnter()) { 4916 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 4917 } else { 4918 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 4919 } 4920 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4921 } 4922 4923 void LocationsBuilderARM64::VisitMul(HMul* mul) { 4924 LocationSummary* locations = 4925 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 4926 switch (mul->GetResultType()) { 4927 case DataType::Type::kInt32: 4928 case DataType::Type::kInt64: 4929 locations->SetInAt(0, Location::RequiresRegister()); 4930 locations->SetInAt(1, Location::RequiresRegister()); 4931 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4932 break; 4933 4934 case DataType::Type::kFloat32: 4935 case DataType::Type::kFloat64: 4936 locations->SetInAt(0, Location::RequiresFpuRegister()); 4937 locations->SetInAt(1, Location::RequiresFpuRegister()); 4938 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4939 break; 4940 4941 default: 4942 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 4943 } 4944 } 4945 4946 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) { 4947 switch (mul->GetResultType()) { 4948 case DataType::Type::kInt32: 4949 case DataType::Type::kInt64: 4950 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); 4951 break; 4952 4953 case DataType::Type::kFloat32: 4954 case DataType::Type::kFloat64: 4955 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1)); 4956 break; 4957 4958 default: 4959 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 4960 } 4961 } 4962 4963 void LocationsBuilderARM64::VisitNeg(HNeg* neg) { 4964 LocationSummary* locations = 4965 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 4966 switch (neg->GetResultType()) { 4967 case DataType::Type::kInt32: 4968 case DataType::Type::kInt64: 4969 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg)); 4970 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4971 break; 4972 4973 case DataType::Type::kFloat32: 4974 case DataType::Type::kFloat64: 4975 locations->SetInAt(0, Location::RequiresFpuRegister()); 4976 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4977 break; 4978 4979 default: 4980 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 4981 } 4982 } 4983 4984 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { 4985 switch (neg->GetResultType()) { 4986 case DataType::Type::kInt32: 4987 case DataType::Type::kInt64: 4988 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0)); 4989 break; 4990 4991 case DataType::Type::kFloat32: 4992 case DataType::Type::kFloat64: 4993 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0)); 4994 break; 4995 4996 default: 4997 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 4998 } 4999 } 5000 5001 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { 5002 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5003 instruction, LocationSummary::kCallOnMainOnly); 5004 InvokeRuntimeCallingConvention calling_convention; 5005 locations->SetOut(LocationFrom(x0)); 5006 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5007 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 5008 } 5009 5010 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { 5011 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. 5012 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); 5013 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 5014 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 5015 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5016 } 5017 5018 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { 5019 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5020 instruction, LocationSummary::kCallOnMainOnly); 5021 InvokeRuntimeCallingConvention calling_convention; 5022 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5023 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 5024 } 5025 5026 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { 5027 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 5028 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 5029 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5030 } 5031 5032 void LocationsBuilderARM64::VisitNot(HNot* instruction) { 5033 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5034 locations->SetInAt(0, Location::RequiresRegister()); 5035 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5036 } 5037 5038 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { 5039 switch (instruction->GetResultType()) { 5040 case DataType::Type::kInt32: 5041 case DataType::Type::kInt64: 5042 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0)); 5043 break; 5044 5045 default: 5046 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); 5047 } 5048 } 5049 5050 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) { 5051 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5052 locations->SetInAt(0, Location::RequiresRegister()); 5053 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5054 } 5055 5056 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) { 5057 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1)); 5058 } 5059 5060 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { 5061 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 5062 locations->SetInAt(0, Location::RequiresRegister()); 5063 } 5064 5065 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { 5066 if (CanMoveNullCheckToUser(instruction)) { 5067 return; 5068 } 5069 { 5070 // Ensure that between load and RecordPcInfo there are no pools emitted. 5071 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 5072 Location obj = instruction->GetLocations()->InAt(0); 5073 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); 5074 RecordPcInfo(instruction, instruction->GetDexPc()); 5075 } 5076 } 5077 5078 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { 5079 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction); 5080 AddSlowPath(slow_path); 5081 5082 LocationSummary* locations = instruction->GetLocations(); 5083 Location obj = locations->InAt(0); 5084 5085 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); 5086 } 5087 5088 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { 5089 codegen_->GenerateNullCheck(instruction); 5090 } 5091 5092 void LocationsBuilderARM64::VisitOr(HOr* instruction) { 5093 HandleBinaryOp(instruction); 5094 } 5095 5096 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { 5097 HandleBinaryOp(instruction); 5098 } 5099 5100 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5101 LOG(FATAL) << "Unreachable"; 5102 } 5103 5104 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) { 5105 if (instruction->GetNext()->IsSuspendCheck() && 5106 instruction->GetBlock()->GetLoopInformation() != nullptr) { 5107 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 5108 // The back edge will generate the suspend check. 5109 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 5110 } 5111 5112 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5113 } 5114 5115 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { 5116 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5117 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 5118 if (location.IsStackSlot()) { 5119 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5120 } else if (location.IsDoubleStackSlot()) { 5121 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5122 } 5123 locations->SetOut(location); 5124 } 5125 5126 void InstructionCodeGeneratorARM64::VisitParameterValue( 5127 HParameterValue* instruction ATTRIBUTE_UNUSED) { 5128 // Nothing to do, the parameter is already at its location. 5129 } 5130 5131 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { 5132 LocationSummary* locations = 5133 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5134 locations->SetOut(LocationFrom(kArtMethodRegister)); 5135 } 5136 5137 void InstructionCodeGeneratorARM64::VisitCurrentMethod( 5138 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 5139 // Nothing to do, the method is already at its location. 5140 } 5141 5142 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { 5143 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5144 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 5145 locations->SetInAt(i, Location::Any()); 5146 } 5147 locations->SetOut(Location::Any()); 5148 } 5149 5150 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 5151 LOG(FATAL) << "Unreachable"; 5152 } 5153 5154 void LocationsBuilderARM64::VisitRem(HRem* rem) { 5155 DataType::Type type = rem->GetResultType(); 5156 LocationSummary::CallKind call_kind = 5157 DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly 5158 : LocationSummary::kNoCall; 5159 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); 5160 5161 switch (type) { 5162 case DataType::Type::kInt32: 5163 case DataType::Type::kInt64: 5164 locations->SetInAt(0, Location::RequiresRegister()); 5165 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 5166 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5167 break; 5168 5169 case DataType::Type::kFloat32: 5170 case DataType::Type::kFloat64: { 5171 InvokeRuntimeCallingConvention calling_convention; 5172 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 5173 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 5174 locations->SetOut(calling_convention.GetReturnLocation(type)); 5175 5176 break; 5177 } 5178 5179 default: 5180 LOG(FATAL) << "Unexpected rem type " << type; 5181 } 5182 } 5183 5184 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) { 5185 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 5186 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); 5187 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; 5188 5189 Register out = OutputRegister(instruction); 5190 Register dividend = InputRegisterAt(instruction, 0); 5191 5192 if (abs_imm == 2) { 5193 __ Cmp(dividend, 0); 5194 __ And(out, dividend, 1); 5195 __ Csneg(out, out, out, ge); 5196 } else { 5197 UseScratchRegisterScope temps(GetVIXLAssembler()); 5198 Register temp = temps.AcquireSameSizeAs(out); 5199 5200 __ Negs(temp, dividend); 5201 __ And(out, dividend, abs_imm - 1); 5202 __ And(temp, temp, abs_imm - 1); 5203 __ Csneg(out, out, temp, mi); 5204 } 5205 } 5206 5207 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) { 5208 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 5209 5210 if (imm == 0) { 5211 // Do not generate anything. 5212 // DivZeroCheck would prevent any code to be executed. 5213 return; 5214 } 5215 5216 if (IsPowerOfTwo(AbsOrMin(imm))) { 5217 // Cases imm == -1 or imm == 1 are handled in constant folding by 5218 // InstructionWithAbsorbingInputSimplifier. 5219 // If the cases have survided till code generation they are handled in 5220 // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0). 5221 // The correct code is generated for them, just more instructions. 5222 GenerateIntRemForPower2Denom(instruction); 5223 } else { 5224 DCHECK(imm < -2 || imm > 2) << imm; 5225 GenerateDivRemWithAnyConstant(instruction); 5226 } 5227 } 5228 5229 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) { 5230 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) 5231 << instruction->GetResultType(); 5232 5233 if (instruction->GetLocations()->InAt(1).IsConstant()) { 5234 GenerateIntRemForConstDenom(instruction); 5235 } else { 5236 Register out = OutputRegister(instruction); 5237 Register dividend = InputRegisterAt(instruction, 0); 5238 Register divisor = InputRegisterAt(instruction, 1); 5239 UseScratchRegisterScope temps(GetVIXLAssembler()); 5240 Register temp = temps.AcquireSameSizeAs(out); 5241 __ Sdiv(temp, dividend, divisor); 5242 __ Msub(out, temp, divisor, dividend); 5243 } 5244 } 5245 5246 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { 5247 DataType::Type type = rem->GetResultType(); 5248 5249 switch (type) { 5250 case DataType::Type::kInt32: 5251 case DataType::Type::kInt64: { 5252 GenerateIntRem(rem); 5253 break; 5254 } 5255 5256 case DataType::Type::kFloat32: 5257 case DataType::Type::kFloat64: { 5258 QuickEntrypointEnum entrypoint = 5259 (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; 5260 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc()); 5261 if (type == DataType::Type::kFloat32) { 5262 CheckEntrypointTypes<kQuickFmodf, float, float, float>(); 5263 } else { 5264 CheckEntrypointTypes<kQuickFmod, double, double, double>(); 5265 } 5266 break; 5267 } 5268 5269 default: 5270 LOG(FATAL) << "Unexpected rem type " << type; 5271 UNREACHABLE(); 5272 } 5273 } 5274 5275 void LocationsBuilderARM64::VisitMin(HMin* min) { 5276 HandleBinaryOp(min); 5277 } 5278 5279 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { 5280 HandleBinaryOp(min); 5281 } 5282 5283 void LocationsBuilderARM64::VisitMax(HMax* max) { 5284 HandleBinaryOp(max); 5285 } 5286 5287 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { 5288 HandleBinaryOp(max); 5289 } 5290 5291 void LocationsBuilderARM64::VisitAbs(HAbs* abs) { 5292 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); 5293 switch (abs->GetResultType()) { 5294 case DataType::Type::kInt32: 5295 case DataType::Type::kInt64: 5296 locations->SetInAt(0, Location::RequiresRegister()); 5297 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5298 break; 5299 case DataType::Type::kFloat32: 5300 case DataType::Type::kFloat64: 5301 locations->SetInAt(0, Location::RequiresFpuRegister()); 5302 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5303 break; 5304 default: 5305 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); 5306 } 5307 } 5308 5309 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { 5310 switch (abs->GetResultType()) { 5311 case DataType::Type::kInt32: 5312 case DataType::Type::kInt64: { 5313 Register in_reg = InputRegisterAt(abs, 0); 5314 Register out_reg = OutputRegister(abs); 5315 __ Cmp(in_reg, Operand(0)); 5316 __ Cneg(out_reg, in_reg, lt); 5317 break; 5318 } 5319 case DataType::Type::kFloat32: 5320 case DataType::Type::kFloat64: { 5321 FPRegister in_reg = InputFPRegisterAt(abs, 0); 5322 FPRegister out_reg = OutputFPRegister(abs); 5323 __ Fabs(out_reg, in_reg); 5324 break; 5325 } 5326 default: 5327 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); 5328 } 5329 } 5330 5331 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { 5332 constructor_fence->SetLocations(nullptr); 5333 } 5334 5335 void InstructionCodeGeneratorARM64::VisitConstructorFence( 5336 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 5337 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 5338 } 5339 5340 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5341 memory_barrier->SetLocations(nullptr); 5342 } 5343 5344 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5345 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 5346 } 5347 5348 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { 5349 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5350 DataType::Type return_type = instruction->InputAt(0)->GetType(); 5351 locations->SetInAt(0, ARM64ReturnLocation(return_type)); 5352 } 5353 5354 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) { 5355 codegen_->GenerateFrameExit(); 5356 } 5357 5358 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { 5359 instruction->SetLocations(nullptr); 5360 } 5361 5362 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) { 5363 codegen_->GenerateFrameExit(); 5364 } 5365 5366 void LocationsBuilderARM64::VisitRor(HRor* ror) { 5367 HandleBinaryOp(ror); 5368 } 5369 5370 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) { 5371 HandleBinaryOp(ror); 5372 } 5373 5374 void LocationsBuilderARM64::VisitShl(HShl* shl) { 5375 HandleShift(shl); 5376 } 5377 5378 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) { 5379 HandleShift(shl); 5380 } 5381 5382 void LocationsBuilderARM64::VisitShr(HShr* shr) { 5383 HandleShift(shr); 5384 } 5385 5386 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) { 5387 HandleShift(shr); 5388 } 5389 5390 void LocationsBuilderARM64::VisitSub(HSub* instruction) { 5391 HandleBinaryOp(instruction); 5392 } 5393 5394 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) { 5395 HandleBinaryOp(instruction); 5396 } 5397 5398 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5399 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5400 } 5401 5402 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5403 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5404 } 5405 5406 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5407 HandleFieldSet(instruction); 5408 } 5409 5410 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5411 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5412 } 5413 5414 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet( 5415 HUnresolvedInstanceFieldGet* instruction) { 5416 FieldAccessCallingConventionARM64 calling_convention; 5417 codegen_->CreateUnresolvedFieldLocationSummary( 5418 instruction, instruction->GetFieldType(), calling_convention); 5419 } 5420 5421 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet( 5422 HUnresolvedInstanceFieldGet* instruction) { 5423 FieldAccessCallingConventionARM64 calling_convention; 5424 codegen_->GenerateUnresolvedFieldAccess(instruction, 5425 instruction->GetFieldType(), 5426 instruction->GetFieldIndex(), 5427 instruction->GetDexPc(), 5428 calling_convention); 5429 } 5430 5431 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet( 5432 HUnresolvedInstanceFieldSet* instruction) { 5433 FieldAccessCallingConventionARM64 calling_convention; 5434 codegen_->CreateUnresolvedFieldLocationSummary( 5435 instruction, instruction->GetFieldType(), calling_convention); 5436 } 5437 5438 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet( 5439 HUnresolvedInstanceFieldSet* instruction) { 5440 FieldAccessCallingConventionARM64 calling_convention; 5441 codegen_->GenerateUnresolvedFieldAccess(instruction, 5442 instruction->GetFieldType(), 5443 instruction->GetFieldIndex(), 5444 instruction->GetDexPc(), 5445 calling_convention); 5446 } 5447 5448 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet( 5449 HUnresolvedStaticFieldGet* instruction) { 5450 FieldAccessCallingConventionARM64 calling_convention; 5451 codegen_->CreateUnresolvedFieldLocationSummary( 5452 instruction, instruction->GetFieldType(), calling_convention); 5453 } 5454 5455 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet( 5456 HUnresolvedStaticFieldGet* instruction) { 5457 FieldAccessCallingConventionARM64 calling_convention; 5458 codegen_->GenerateUnresolvedFieldAccess(instruction, 5459 instruction->GetFieldType(), 5460 instruction->GetFieldIndex(), 5461 instruction->GetDexPc(), 5462 calling_convention); 5463 } 5464 5465 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet( 5466 HUnresolvedStaticFieldSet* instruction) { 5467 FieldAccessCallingConventionARM64 calling_convention; 5468 codegen_->CreateUnresolvedFieldLocationSummary( 5469 instruction, instruction->GetFieldType(), calling_convention); 5470 } 5471 5472 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( 5473 HUnresolvedStaticFieldSet* instruction) { 5474 FieldAccessCallingConventionARM64 calling_convention; 5475 codegen_->GenerateUnresolvedFieldAccess(instruction, 5476 instruction->GetFieldType(), 5477 instruction->GetFieldIndex(), 5478 instruction->GetDexPc(), 5479 calling_convention); 5480 } 5481 5482 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5483 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5484 instruction, LocationSummary::kCallOnSlowPath); 5485 // In suspend check slow path, usually there are no caller-save registers at all. 5486 // If SIMD instructions are present, however, we force spilling all live SIMD 5487 // registers in full width (since the runtime only saves/restores lower part). 5488 locations->SetCustomSlowPathCallerSaves( 5489 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5490 } 5491 5492 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5493 HBasicBlock* block = instruction->GetBlock(); 5494 if (block->GetLoopInformation() != nullptr) { 5495 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5496 // The back edge will generate the suspend check. 5497 return; 5498 } 5499 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5500 // The goto will generate the suspend check. 5501 return; 5502 } 5503 GenerateSuspendCheck(instruction, nullptr); 5504 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5505 } 5506 5507 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { 5508 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5509 instruction, LocationSummary::kCallOnMainOnly); 5510 InvokeRuntimeCallingConvention calling_convention; 5511 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5512 } 5513 5514 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { 5515 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 5516 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 5517 } 5518 5519 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { 5520 LocationSummary* locations = 5521 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); 5522 DataType::Type input_type = conversion->GetInputType(); 5523 DataType::Type result_type = conversion->GetResultType(); 5524 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 5525 << input_type << " -> " << result_type; 5526 if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || 5527 (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { 5528 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; 5529 } 5530 5531 if (DataType::IsFloatingPointType(input_type)) { 5532 locations->SetInAt(0, Location::RequiresFpuRegister()); 5533 } else { 5534 locations->SetInAt(0, Location::RequiresRegister()); 5535 } 5536 5537 if (DataType::IsFloatingPointType(result_type)) { 5538 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5539 } else { 5540 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5541 } 5542 } 5543 5544 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) { 5545 DataType::Type result_type = conversion->GetResultType(); 5546 DataType::Type input_type = conversion->GetInputType(); 5547 5548 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 5549 << input_type << " -> " << result_type; 5550 5551 if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { 5552 int result_size = DataType::Size(result_type); 5553 int input_size = DataType::Size(input_type); 5554 int min_size = std::min(result_size, input_size); 5555 Register output = OutputRegister(conversion); 5556 Register source = InputRegisterAt(conversion, 0); 5557 if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) { 5558 // 'int' values are used directly as W registers, discarding the top 5559 // bits, so we don't need to sign-extend and can just perform a move. 5560 // We do not pass the `kDiscardForSameWReg` argument to force clearing the 5561 // top 32 bits of the target register. We theoretically could leave those 5562 // bits unchanged, but we would have to make sure that no code uses a 5563 // 32bit input value as a 64bit value assuming that the top 32 bits are 5564 // zero. 5565 __ Mov(output.W(), source.W()); 5566 } else if (DataType::IsUnsignedType(result_type) || 5567 (DataType::IsUnsignedType(input_type) && input_size < result_size)) { 5568 __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte); 5569 } else { 5570 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); 5571 } 5572 } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { 5573 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); 5574 } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { 5575 CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); 5576 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0)); 5577 } else if (DataType::IsFloatingPointType(result_type) && 5578 DataType::IsFloatingPointType(input_type)) { 5579 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0)); 5580 } else { 5581 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type 5582 << " to " << result_type; 5583 } 5584 } 5585 5586 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) { 5587 HandleShift(ushr); 5588 } 5589 5590 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) { 5591 HandleShift(ushr); 5592 } 5593 5594 void LocationsBuilderARM64::VisitXor(HXor* instruction) { 5595 HandleBinaryOp(instruction); 5596 } 5597 5598 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { 5599 HandleBinaryOp(instruction); 5600 } 5601 5602 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5603 // Nothing to do, this should be removed during prepare for register allocator. 5604 LOG(FATAL) << "Unreachable"; 5605 } 5606 5607 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5608 // Nothing to do, this should be removed during prepare for register allocator. 5609 LOG(FATAL) << "Unreachable"; 5610 } 5611 5612 // Simple implementation of packed switch - generate cascaded compare/jumps. 5613 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5614 LocationSummary* locations = 5615 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 5616 locations->SetInAt(0, Location::RequiresRegister()); 5617 } 5618 5619 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5620 int32_t lower_bound = switch_instr->GetStartValue(); 5621 uint32_t num_entries = switch_instr->GetNumEntries(); 5622 Register value_reg = InputRegisterAt(switch_instr, 0); 5623 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 5624 5625 // Roughly set 16 as max average assemblies generated per HIR in a graph. 5626 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize; 5627 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to 5628 // make sure we don't emit it if the target may run out of range. 5629 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR 5630 // ranges and emit the tables only as required. 5631 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; 5632 5633 if (num_entries <= kPackedSwitchCompareJumpThreshold || 5634 // Current instruction id is an upper bound of the number of HIRs in the graph. 5635 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { 5636 // Create a series of compare/jumps. 5637 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5638 Register temp = temps.AcquireW(); 5639 __ Subs(temp, value_reg, Operand(lower_bound)); 5640 5641 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 5642 // Jump to successors[0] if value == lower_bound. 5643 __ B(eq, codegen_->GetLabelOf(successors[0])); 5644 int32_t last_index = 0; 5645 for (; num_entries - last_index > 2; last_index += 2) { 5646 __ Subs(temp, temp, Operand(2)); 5647 // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. 5648 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); 5649 // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. 5650 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); 5651 } 5652 if (num_entries - last_index == 2) { 5653 // The last missing case_value. 5654 __ Cmp(temp, Operand(1)); 5655 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); 5656 } 5657 5658 // And the default for any other value. 5659 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 5660 __ B(codegen_->GetLabelOf(default_block)); 5661 } 5662 } else { 5663 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr); 5664 5665 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5666 5667 // Below instructions should use at most one blocked register. Since there are two blocked 5668 // registers, we are free to block one. 5669 Register temp_w = temps.AcquireW(); 5670 Register index; 5671 // Remove the bias. 5672 if (lower_bound != 0) { 5673 index = temp_w; 5674 __ Sub(index, value_reg, Operand(lower_bound)); 5675 } else { 5676 index = value_reg; 5677 } 5678 5679 // Jump to default block if index is out of the range. 5680 __ Cmp(index, Operand(num_entries)); 5681 __ B(hs, codegen_->GetLabelOf(default_block)); 5682 5683 // In current VIXL implementation, it won't require any blocked registers to encode the 5684 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the 5685 // register pressure. 5686 Register table_base = temps.AcquireX(); 5687 // Load jump offset from the table. 5688 __ Adr(table_base, jump_table->GetTableStartLabel()); 5689 Register jump_offset = temp_w; 5690 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); 5691 5692 // Jump to target block by branching to table_base(pc related) + offset. 5693 Register target_address = table_base; 5694 __ Add(target_address, table_base, Operand(jump_offset, SXTW)); 5695 __ Br(target_address); 5696 } 5697 } 5698 5699 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( 5700 HInstruction* instruction, 5701 Location out, 5702 uint32_t offset, 5703 Location maybe_temp, 5704 ReadBarrierOption read_barrier_option) { 5705 DataType::Type type = DataType::Type::kReference; 5706 Register out_reg = RegisterFrom(out, type); 5707 if (read_barrier_option == kWithReadBarrier) { 5708 CHECK(kEmitCompilerReadBarrier); 5709 if (kUseBakerReadBarrier) { 5710 // Load with fast path based Baker's read barrier. 5711 // /* HeapReference<Object> */ out = *(out + offset) 5712 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5713 out, 5714 out_reg, 5715 offset, 5716 maybe_temp, 5717 /* needs_null_check= */ false, 5718 /* use_load_acquire= */ false); 5719 } else { 5720 // Load with slow path based read barrier. 5721 // Save the value of `out` into `maybe_temp` before overwriting it 5722 // in the following move operation, as we will need it for the 5723 // read barrier below. 5724 Register temp_reg = RegisterFrom(maybe_temp, type); 5725 __ Mov(temp_reg, out_reg); 5726 // /* HeapReference<Object> */ out = *(out + offset) 5727 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5728 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 5729 } 5730 } else { 5731 // Plain load with no read barrier. 5732 // /* HeapReference<Object> */ out = *(out + offset) 5733 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5734 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5735 } 5736 } 5737 5738 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( 5739 HInstruction* instruction, 5740 Location out, 5741 Location obj, 5742 uint32_t offset, 5743 Location maybe_temp, 5744 ReadBarrierOption read_barrier_option) { 5745 DataType::Type type = DataType::Type::kReference; 5746 Register out_reg = RegisterFrom(out, type); 5747 Register obj_reg = RegisterFrom(obj, type); 5748 if (read_barrier_option == kWithReadBarrier) { 5749 CHECK(kEmitCompilerReadBarrier); 5750 if (kUseBakerReadBarrier) { 5751 // Load with fast path based Baker's read barrier. 5752 // /* HeapReference<Object> */ out = *(obj + offset) 5753 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5754 out, 5755 obj_reg, 5756 offset, 5757 maybe_temp, 5758 /* needs_null_check= */ false, 5759 /* use_load_acquire= */ false); 5760 } else { 5761 // Load with slow path based read barrier. 5762 // /* HeapReference<Object> */ out = *(obj + offset) 5763 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5764 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 5765 } 5766 } else { 5767 // Plain load with no read barrier. 5768 // /* HeapReference<Object> */ out = *(obj + offset) 5769 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5770 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5771 } 5772 } 5773 5774 void CodeGeneratorARM64::GenerateGcRootFieldLoad( 5775 HInstruction* instruction, 5776 Location root, 5777 Register obj, 5778 uint32_t offset, 5779 vixl::aarch64::Label* fixup_label, 5780 ReadBarrierOption read_barrier_option) { 5781 DCHECK(fixup_label == nullptr || offset == 0u); 5782 Register root_reg = RegisterFrom(root, DataType::Type::kReference); 5783 if (read_barrier_option == kWithReadBarrier) { 5784 DCHECK(kEmitCompilerReadBarrier); 5785 if (kUseBakerReadBarrier) { 5786 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 5787 // Baker's read barrier are used. 5788 5789 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 5790 // the Marking Register) to decide whether we need to enter 5791 // the slow path to mark the GC root. 5792 // 5793 // We use shared thunks for the slow path; shared within the method 5794 // for JIT, across methods for AOT. That thunk checks the reference 5795 // and jumps to the entrypoint if needed. 5796 // 5797 // lr = &return_address; 5798 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 5799 // if (mr) { // Thread::Current()->GetIsGcMarking() 5800 // goto gc_root_thunk<root_reg>(lr) 5801 // } 5802 // return_address: 5803 5804 UseScratchRegisterScope temps(GetVIXLAssembler()); 5805 DCHECK(temps.IsAvailable(ip0)); 5806 DCHECK(temps.IsAvailable(ip1)); 5807 temps.Exclude(ip0, ip1); 5808 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); 5809 5810 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); 5811 vixl::aarch64::Label return_address; 5812 __ adr(lr, &return_address); 5813 if (fixup_label != nullptr) { 5814 __ bind(fixup_label); 5815 } 5816 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, 5817 "GC root LDR must be 2 instructions (8B) before the return address label."); 5818 __ ldr(root_reg, MemOperand(obj.X(), offset)); 5819 EmitBakerReadBarrierCbnz(custom_data); 5820 __ bind(&return_address); 5821 } else { 5822 // GC root loaded through a slow path for read barriers other 5823 // than Baker's. 5824 // /* GcRoot<mirror::Object>* */ root = obj + offset 5825 if (fixup_label == nullptr) { 5826 __ Add(root_reg.X(), obj.X(), offset); 5827 } else { 5828 EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); 5829 } 5830 // /* mirror::Object* */ root = root->Read() 5831 GenerateReadBarrierForRootSlow(instruction, root, root); 5832 } 5833 } else { 5834 // Plain GC root load with no read barrier. 5835 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 5836 if (fixup_label == nullptr) { 5837 __ Ldr(root_reg, MemOperand(obj, offset)); 5838 } else { 5839 EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); 5840 } 5841 // Note that GC roots are not affected by heap poisoning, thus we 5842 // do not have to unpoison `root_reg` here. 5843 } 5844 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5845 } 5846 5847 void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier( 5848 vixl::aarch64::Register marked, 5849 vixl::aarch64::Register old_value) { 5850 DCHECK(kEmitCompilerReadBarrier); 5851 DCHECK(kUseBakerReadBarrier); 5852 5853 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. 5854 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode()); 5855 5856 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); 5857 vixl::aarch64::Label return_address; 5858 __ adr(lr, &return_address); 5859 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, 5860 "GC root LDR must be 2 instructions (8B) before the return address label."); 5861 __ mov(marked, old_value); 5862 EmitBakerReadBarrierCbnz(custom_data); 5863 __ bind(&return_address); 5864 } 5865 5866 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 5867 Location ref, 5868 vixl::aarch64::Register obj, 5869 const vixl::aarch64::MemOperand& src, 5870 bool needs_null_check, 5871 bool use_load_acquire) { 5872 DCHECK(kEmitCompilerReadBarrier); 5873 DCHECK(kUseBakerReadBarrier); 5874 5875 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 5876 // Marking Register) to decide whether we need to enter the slow 5877 // path to mark the reference. Then, in the slow path, check the 5878 // gray bit in the lock word of the reference's holder (`obj`) to 5879 // decide whether to mark `ref` or not. 5880 // 5881 // We use shared thunks for the slow path; shared within the method 5882 // for JIT, across methods for AOT. That thunk checks the holder 5883 // and jumps to the entrypoint if needed. If the holder is not gray, 5884 // it creates a fake dependency and returns to the LDR instruction. 5885 // 5886 // lr = &gray_return_address; 5887 // if (mr) { // Thread::Current()->GetIsGcMarking() 5888 // goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr) 5889 // } 5890 // not_gray_return_address: 5891 // // Original reference load. If the offset is too large to fit 5892 // // into LDR, we use an adjusted base register here. 5893 // HeapReference<mirror::Object> reference = *(obj+offset); 5894 // gray_return_address: 5895 5896 DCHECK(src.GetAddrMode() == vixl::aarch64::Offset); 5897 DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>)); 5898 5899 UseScratchRegisterScope temps(GetVIXLAssembler()); 5900 DCHECK(temps.IsAvailable(ip0)); 5901 DCHECK(temps.IsAvailable(ip1)); 5902 temps.Exclude(ip0, ip1); 5903 uint32_t custom_data = use_load_acquire 5904 ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode()) 5905 : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode()); 5906 5907 { 5908 ExactAssemblyScope guard(GetVIXLAssembler(), 5909 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 5910 vixl::aarch64::Label return_address; 5911 __ adr(lr, &return_address); 5912 EmitBakerReadBarrierCbnz(custom_data); 5913 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 5914 "Field LDR must be 1 instruction (4B) before the return address label; " 5915 " 2 instructions (8B) for heap poisoning."); 5916 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 5917 if (use_load_acquire) { 5918 DCHECK_EQ(src.GetOffset(), 0); 5919 __ ldar(ref_reg, src); 5920 } else { 5921 __ ldr(ref_reg, src); 5922 } 5923 if (needs_null_check) { 5924 MaybeRecordImplicitNullCheck(instruction); 5925 } 5926 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses 5927 // macro instructions disallowed in ExactAssemblyScope. 5928 if (kPoisonHeapReferences) { 5929 __ neg(ref_reg, Operand(ref_reg)); 5930 } 5931 __ bind(&return_address); 5932 } 5933 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); 5934 } 5935 5936 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 5937 Location ref, 5938 Register obj, 5939 uint32_t offset, 5940 Location maybe_temp, 5941 bool needs_null_check, 5942 bool use_load_acquire) { 5943 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); 5944 Register base = obj; 5945 if (use_load_acquire) { 5946 DCHECK(maybe_temp.IsRegister()); 5947 base = WRegisterFrom(maybe_temp); 5948 __ Add(base, obj, offset); 5949 offset = 0u; 5950 } else if (offset >= kReferenceLoadMinFarOffset) { 5951 DCHECK(maybe_temp.IsRegister()); 5952 base = WRegisterFrom(maybe_temp); 5953 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); 5954 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); 5955 offset &= (kReferenceLoadMinFarOffset - 1u); 5956 } 5957 MemOperand src(base.X(), offset); 5958 GenerateFieldLoadWithBakerReadBarrier( 5959 instruction, ref, obj, src, needs_null_check, use_load_acquire); 5960 } 5961 5962 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction, 5963 Location ref, 5964 Register obj, 5965 uint32_t data_offset, 5966 Location index, 5967 bool needs_null_check) { 5968 DCHECK(kEmitCompilerReadBarrier); 5969 DCHECK(kUseBakerReadBarrier); 5970 5971 static_assert( 5972 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 5973 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 5974 size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); 5975 5976 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 5977 // Marking Register) to decide whether we need to enter the slow 5978 // path to mark the reference. Then, in the slow path, check the 5979 // gray bit in the lock word of the reference's holder (`obj`) to 5980 // decide whether to mark `ref` or not. 5981 // 5982 // We use shared thunks for the slow path; shared within the method 5983 // for JIT, across methods for AOT. That thunk checks the holder 5984 // and jumps to the entrypoint if needed. If the holder is not gray, 5985 // it creates a fake dependency and returns to the LDR instruction. 5986 // 5987 // lr = &gray_return_address; 5988 // if (mr) { // Thread::Current()->GetIsGcMarking() 5989 // goto array_thunk<base_reg>(lr) 5990 // } 5991 // not_gray_return_address: 5992 // // Original reference load. If the offset is too large to fit 5993 // // into LDR, we use an adjusted base register here. 5994 // HeapReference<mirror::Object> reference = data[index]; 5995 // gray_return_address: 5996 5997 DCHECK(index.IsValid()); 5998 Register index_reg = RegisterFrom(index, DataType::Type::kInt32); 5999 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 6000 6001 UseScratchRegisterScope temps(GetVIXLAssembler()); 6002 DCHECK(temps.IsAvailable(ip0)); 6003 DCHECK(temps.IsAvailable(ip1)); 6004 temps.Exclude(ip0, ip1); 6005 6006 Register temp; 6007 if (instruction->GetArray()->IsIntermediateAddress()) { 6008 // We do not need to compute the intermediate address from the array: the 6009 // input instruction has done it already. See the comment in 6010 // `TryExtractArrayAccessAddress()`. 6011 if (kIsDebugBuild) { 6012 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); 6013 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); 6014 } 6015 temp = obj; 6016 } else { 6017 temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); 6018 __ Add(temp.X(), obj.X(), Operand(data_offset)); 6019 } 6020 6021 uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); 6022 6023 { 6024 ExactAssemblyScope guard(GetVIXLAssembler(), 6025 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 6026 vixl::aarch64::Label return_address; 6027 __ adr(lr, &return_address); 6028 EmitBakerReadBarrierCbnz(custom_data); 6029 static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6030 "Array LDR must be 1 instruction (4B) before the return address label; " 6031 " 2 instructions (8B) for heap poisoning."); 6032 __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); 6033 DCHECK(!needs_null_check); // The thunk cannot handle the null check. 6034 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses 6035 // macro instructions disallowed in ExactAssemblyScope. 6036 if (kPoisonHeapReferences) { 6037 __ neg(ref_reg, Operand(ref_reg)); 6038 } 6039 __ bind(&return_address); 6040 } 6041 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); 6042 } 6043 6044 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { 6045 // The following condition is a compile-time one, so it does not have a run-time cost. 6046 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { 6047 // The following condition is a run-time one; it is executed after the 6048 // previous compile-time test, to avoid penalizing non-debug builds. 6049 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { 6050 UseScratchRegisterScope temps(GetVIXLAssembler()); 6051 Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW(); 6052 GetAssembler()->GenerateMarkingRegisterCheck(temp, code); 6053 } 6054 } 6055 } 6056 6057 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, 6058 Location out, 6059 Location ref, 6060 Location obj, 6061 uint32_t offset, 6062 Location index) { 6063 DCHECK(kEmitCompilerReadBarrier); 6064 6065 // Insert a slow path based read barrier *after* the reference load. 6066 // 6067 // If heap poisoning is enabled, the unpoisoning of the loaded 6068 // reference will be carried out by the runtime within the slow 6069 // path. 6070 // 6071 // Note that `ref` currently does not get unpoisoned (when heap 6072 // poisoning is enabled), which is alright as the `ref` argument is 6073 // not used by the artReadBarrierSlow entry point. 6074 // 6075 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 6076 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) 6077 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); 6078 AddSlowPath(slow_path); 6079 6080 __ B(slow_path->GetEntryLabel()); 6081 __ Bind(slow_path->GetExitLabel()); 6082 } 6083 6084 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 6085 Location out, 6086 Location ref, 6087 Location obj, 6088 uint32_t offset, 6089 Location index) { 6090 if (kEmitCompilerReadBarrier) { 6091 // Baker's read barriers shall be handled by the fast path 6092 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). 6093 DCHECK(!kUseBakerReadBarrier); 6094 // If heap poisoning is enabled, unpoisoning will be taken care of 6095 // by the runtime within the slow path. 6096 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 6097 } else if (kPoisonHeapReferences) { 6098 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); 6099 } 6100 } 6101 6102 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 6103 Location out, 6104 Location root) { 6105 DCHECK(kEmitCompilerReadBarrier); 6106 6107 // Insert a slow path based read barrier *after* the GC root load. 6108 // 6109 // Note that GC roots are not affected by heap poisoning, so we do 6110 // not need to do anything special for this here. 6111 SlowPathCodeARM64* slow_path = 6112 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root); 6113 AddSlowPath(slow_path); 6114 6115 __ B(slow_path->GetEntryLabel()); 6116 __ Bind(slow_path->GetExitLabel()); 6117 } 6118 6119 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) { 6120 LocationSummary* locations = 6121 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6122 locations->SetInAt(0, Location::RequiresRegister()); 6123 locations->SetOut(Location::RequiresRegister()); 6124 } 6125 6126 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) { 6127 LocationSummary* locations = instruction->GetLocations(); 6128 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 6129 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 6130 instruction->GetIndex(), kArm64PointerSize).SizeValue(); 6131 __ Ldr(XRegisterFrom(locations->Out()), 6132 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); 6133 } else { 6134 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 6135 instruction->GetIndex(), kArm64PointerSize)); 6136 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), 6137 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 6138 __ Ldr(XRegisterFrom(locations->Out()), 6139 MemOperand(XRegisterFrom(locations->Out()), method_offset)); 6140 } 6141 } 6142 6143 static void PatchJitRootUse(uint8_t* code, 6144 const uint8_t* roots_data, 6145 vixl::aarch64::Literal<uint32_t>* literal, 6146 uint64_t index_in_table) { 6147 uint32_t literal_offset = literal->GetOffset(); 6148 uintptr_t address = 6149 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 6150 uint8_t* data = code + literal_offset; 6151 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); 6152 } 6153 6154 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 6155 for (const auto& entry : jit_string_patches_) { 6156 const StringReference& string_reference = entry.first; 6157 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6158 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 6159 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6160 } 6161 for (const auto& entry : jit_class_patches_) { 6162 const TypeReference& type_reference = entry.first; 6163 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6164 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 6165 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6166 } 6167 } 6168 6169 #undef __ 6170 #undef QUICK_ENTRY_POINT 6171 6172 #define __ assembler.GetVIXLAssembler()-> 6173 6174 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, 6175 vixl::aarch64::Register base_reg, 6176 vixl::aarch64::MemOperand& lock_word, 6177 vixl::aarch64::Label* slow_path, 6178 vixl::aarch64::Label* throw_npe = nullptr) { 6179 // Load the lock word containing the rb_state. 6180 __ Ldr(ip0.W(), lock_word); 6181 // Given the numeric representation, it's enough to check the low bit of the rb_state. 6182 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); 6183 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 6184 __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); 6185 static_assert( 6186 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, 6187 "Field and array LDR offsets must be the same to reuse the same code."); 6188 // To throw NPE, we return to the fast path; the artificial dependence below does not matter. 6189 if (throw_npe != nullptr) { 6190 __ Bind(throw_npe); 6191 } 6192 // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). 6193 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6194 "Field LDR must be 1 instruction (4B) before the return address label; " 6195 " 2 instructions (8B) for heap poisoning."); 6196 __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); 6197 // Introduce a dependency on the lock_word including rb_state, 6198 // to prevent load-load reordering, and without using 6199 // a memory barrier (which would be more expensive). 6200 __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); 6201 __ Br(lr); // And return back to the function. 6202 // Note: The fake dependency is unnecessary for the slow path. 6203 } 6204 6205 // Load the read barrier introspection entrypoint in register `entrypoint`. 6206 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, 6207 vixl::aarch64::Register entrypoint) { 6208 // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. 6209 DCHECK_EQ(ip0.GetCode(), 16u); 6210 const int32_t entry_point_offset = 6211 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); 6212 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); 6213 } 6214 6215 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, 6216 uint32_t encoded_data, 6217 /*out*/ std::string* debug_name) { 6218 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); 6219 switch (kind) { 6220 case BakerReadBarrierKind::kField: 6221 case BakerReadBarrierKind::kAcquire: { 6222 auto base_reg = 6223 Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); 6224 CheckValidReg(base_reg.GetCode()); 6225 auto holder_reg = 6226 Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); 6227 CheckValidReg(holder_reg.GetCode()); 6228 UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); 6229 temps.Exclude(ip0, ip1); 6230 // If base_reg differs from holder_reg, the offset was too large and we must have emitted 6231 // an explicit null check before the load. Otherwise, for implicit null checks, we need to 6232 // null-check the holder as we do not necessarily do that check before going to the thunk. 6233 vixl::aarch64::Label throw_npe_label; 6234 vixl::aarch64::Label* throw_npe = nullptr; 6235 if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { 6236 throw_npe = &throw_npe_label; 6237 __ Cbz(holder_reg.W(), throw_npe); 6238 } 6239 // Check if the holder is gray and, if not, add fake dependency to the base register 6240 // and return to the LDR instruction to load the reference. Otherwise, use introspection 6241 // to load the reference and call the entrypoint that performs further checks on the 6242 // reference and marks it if needed. 6243 vixl::aarch64::Label slow_path; 6244 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); 6245 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe); 6246 __ Bind(&slow_path); 6247 if (kind == BakerReadBarrierKind::kField) { 6248 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); 6249 __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. 6250 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6251 __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. 6252 __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. 6253 } else { 6254 DCHECK(kind == BakerReadBarrierKind::kAcquire); 6255 DCHECK(!base_reg.Is(holder_reg)); 6256 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6257 __ Ldar(ip0.W(), MemOperand(base_reg)); 6258 } 6259 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. 6260 __ Br(ip1); // Jump to the entrypoint. 6261 break; 6262 } 6263 case BakerReadBarrierKind::kArray: { 6264 auto base_reg = 6265 Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); 6266 CheckValidReg(base_reg.GetCode()); 6267 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6268 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6269 UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); 6270 temps.Exclude(ip0, ip1); 6271 vixl::aarch64::Label slow_path; 6272 int32_t data_offset = 6273 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); 6274 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); 6275 DCHECK_LT(lock_word.GetOffset(), 0); 6276 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); 6277 __ Bind(&slow_path); 6278 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); 6279 __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. 6280 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6281 __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). 6282 __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create 6283 // a switch case target based on the index register. 6284 __ Mov(ip0, base_reg); // Move the base register to ip0. 6285 __ Br(ip1); // Jump to the entrypoint's array switch case. 6286 break; 6287 } 6288 case BakerReadBarrierKind::kGcRoot: { 6289 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet 6290 // and it does not have a forwarding address), call the correct introspection entrypoint; 6291 // otherwise return the reference (or the extracted forwarding address). 6292 // There is no gray bit check for GC roots. 6293 auto root_reg = 6294 Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); 6295 CheckValidReg(root_reg.GetCode()); 6296 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6297 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6298 UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); 6299 temps.Exclude(ip0, ip1); 6300 vixl::aarch64::Label return_label, not_marked, forwarding_address; 6301 __ Cbz(root_reg, &return_label); 6302 MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); 6303 __ Ldr(ip0.W(), lock_word); 6304 __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); 6305 __ Bind(&return_label); 6306 __ Br(lr); 6307 __ Bind(¬_marked); 6308 __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); 6309 __ B(&forwarding_address, mi); 6310 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6311 // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to 6312 // art_quick_read_barrier_mark_introspection_gc_roots. 6313 __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); 6314 __ Mov(ip0.W(), root_reg); 6315 __ Br(ip1); 6316 __ Bind(&forwarding_address); 6317 __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); 6318 __ Br(lr); 6319 break; 6320 } 6321 default: 6322 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); 6323 UNREACHABLE(); 6324 } 6325 6326 // For JIT, the slow path is considered part of the compiled method, 6327 // so JIT should pass null as `debug_name`. Tests may not have a runtime. 6328 DCHECK(Runtime::Current() == nullptr || 6329 !Runtime::Current()->UseJitCompilation() || 6330 debug_name == nullptr); 6331 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { 6332 std::ostringstream oss; 6333 oss << "BakerReadBarrierThunk"; 6334 switch (kind) { 6335 case BakerReadBarrierKind::kField: 6336 oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) 6337 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); 6338 break; 6339 case BakerReadBarrierKind::kAcquire: 6340 oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) 6341 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); 6342 break; 6343 case BakerReadBarrierKind::kArray: 6344 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); 6345 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6346 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6347 break; 6348 case BakerReadBarrierKind::kGcRoot: 6349 oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); 6350 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6351 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6352 break; 6353 } 6354 *debug_name = oss.str(); 6355 } 6356 } 6357 6358 #undef __ 6359 6360 } // namespace arm64 6361 } // namespace art 6362