1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86_64.h" 18 19 #include "art_method.h" 20 #include "code_generator_utils.h" 21 #include "compiled_method.h" 22 #include "entrypoints/quick/quick_entrypoints.h" 23 #include "gc/accounting/card_table.h" 24 #include "intrinsics.h" 25 #include "intrinsics_x86_64.h" 26 #include "lock_word.h" 27 #include "mirror/array-inl.h" 28 #include "mirror/class-inl.h" 29 #include "mirror/object_reference.h" 30 #include "thread.h" 31 #include "utils/assembler.h" 32 #include "utils/stack_checks.h" 33 #include "utils/x86_64/assembler_x86_64.h" 34 #include "utils/x86_64/managed_register_x86_64.h" 35 36 namespace art { 37 38 template<class MirrorType> 39 class GcRoot; 40 41 namespace x86_64 { 42 43 static constexpr int kCurrentMethodStackOffset = 0; 44 static constexpr Register kMethodRegisterArgument = RDI; 45 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump 46 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 47 // generates less code/data with a small num_entries. 48 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; 49 50 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; 51 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; 52 53 static constexpr int kC2ConditionMask = 0x400; 54 55 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 56 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT 57 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() 58 59 class NullCheckSlowPathX86_64 : public SlowPathCode { 60 public: 61 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} 62 63 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 64 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 65 __ Bind(GetEntryLabel()); 66 if (instruction_->CanThrowIntoCatchBlock()) { 67 // Live registers will be restored in the catch block if caught. 68 SaveLiveRegisters(codegen, instruction_->GetLocations()); 69 } 70 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer, 71 instruction_, 72 instruction_->GetDexPc(), 73 this); 74 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 75 } 76 77 bool IsFatal() const OVERRIDE { return true; } 78 79 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; } 80 81 private: 82 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); 83 }; 84 85 class DivZeroCheckSlowPathX86_64 : public SlowPathCode { 86 public: 87 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} 88 89 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 90 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 91 __ Bind(GetEntryLabel()); 92 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 93 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 94 } 95 96 bool IsFatal() const OVERRIDE { return true; } 97 98 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; } 99 100 private: 101 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); 102 }; 103 104 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { 105 public: 106 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div) 107 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} 108 109 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 110 __ Bind(GetEntryLabel()); 111 if (type_ == Primitive::kPrimInt) { 112 if (is_div_) { 113 __ negl(cpu_reg_); 114 } else { 115 __ xorl(cpu_reg_, cpu_reg_); 116 } 117 118 } else { 119 DCHECK_EQ(Primitive::kPrimLong, type_); 120 if (is_div_) { 121 __ negq(cpu_reg_); 122 } else { 123 __ xorl(cpu_reg_, cpu_reg_); 124 } 125 } 126 __ jmp(GetExitLabel()); 127 } 128 129 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; } 130 131 private: 132 const CpuRegister cpu_reg_; 133 const Primitive::Type type_; 134 const bool is_div_; 135 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); 136 }; 137 138 class SuspendCheckSlowPathX86_64 : public SlowPathCode { 139 public: 140 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) 141 : SlowPathCode(instruction), successor_(successor) {} 142 143 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 144 LocationSummary* locations = instruction_->GetLocations(); 145 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 146 __ Bind(GetEntryLabel()); 147 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD. 148 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 149 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 150 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD. 151 if (successor_ == nullptr) { 152 __ jmp(GetReturnLabel()); 153 } else { 154 __ jmp(x86_64_codegen->GetLabelOf(successor_)); 155 } 156 } 157 158 Label* GetReturnLabel() { 159 DCHECK(successor_ == nullptr); 160 return &return_label_; 161 } 162 163 HBasicBlock* GetSuccessor() const { 164 return successor_; 165 } 166 167 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; } 168 169 private: 170 HBasicBlock* const successor_; 171 Label return_label_; 172 173 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64); 174 }; 175 176 class BoundsCheckSlowPathX86_64 : public SlowPathCode { 177 public: 178 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) 179 : SlowPathCode(instruction) {} 180 181 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 182 LocationSummary* locations = instruction_->GetLocations(); 183 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 184 __ Bind(GetEntryLabel()); 185 if (instruction_->CanThrowIntoCatchBlock()) { 186 // Live registers will be restored in the catch block if caught. 187 SaveLiveRegisters(codegen, instruction_->GetLocations()); 188 } 189 // Are we using an array length from memory? 190 HInstruction* array_length = instruction_->InputAt(1); 191 Location length_loc = locations->InAt(1); 192 InvokeRuntimeCallingConvention calling_convention; 193 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { 194 // Load the array length into our temporary. 195 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); 196 Location array_loc = array_length->GetLocations()->InAt(0); 197 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 198 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); 199 // Check for conflicts with index. 200 if (length_loc.Equals(locations->InAt(0))) { 201 // We know we aren't using parameter 2. 202 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); 203 } 204 __ movl(length_loc.AsRegister<CpuRegister>(), array_len); 205 if (mirror::kUseStringCompression) { 206 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1)); 207 } 208 } 209 210 // We're moving two locations to locations that could overlap, so we need a parallel 211 // move resolver. 212 codegen->EmitParallelMoves( 213 locations->InAt(0), 214 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 215 Primitive::kPrimInt, 216 length_loc, 217 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 218 Primitive::kPrimInt); 219 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 220 ? kQuickThrowStringBounds 221 : kQuickThrowArrayBounds; 222 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 223 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 224 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 225 } 226 227 bool IsFatal() const OVERRIDE { return true; } 228 229 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; } 230 231 private: 232 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); 233 }; 234 235 class LoadClassSlowPathX86_64 : public SlowPathCode { 236 public: 237 LoadClassSlowPathX86_64(HLoadClass* cls, 238 HInstruction* at, 239 uint32_t dex_pc, 240 bool do_clinit) 241 : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { 242 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 243 } 244 245 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 246 LocationSummary* locations = instruction_->GetLocations(); 247 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 248 __ Bind(GetEntryLabel()); 249 250 SaveLiveRegisters(codegen, locations); 251 252 // Custom calling convention: RAX serves as both input and output. 253 __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_)); 254 x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType, 255 instruction_, 256 dex_pc_, 257 this); 258 if (do_clinit_) { 259 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); 260 } else { 261 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); 262 } 263 264 Location out = locations->Out(); 265 // Move the class to the desired location. 266 if (out.IsValid()) { 267 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 268 x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); 269 } 270 271 RestoreLiveRegisters(codegen, locations); 272 // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. 273 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); 274 if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { 275 DCHECK(out.IsValid()); 276 __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false), 277 locations->Out().AsRegister<CpuRegister>()); 278 Label* fixup_label = x86_64_codegen->NewTypeBssEntryPatch(cls_); 279 __ Bind(fixup_label); 280 } 281 __ jmp(GetExitLabel()); 282 } 283 284 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; } 285 286 private: 287 // The class this slow path will load. 288 HLoadClass* const cls_; 289 290 // The dex PC of `at_`. 291 const uint32_t dex_pc_; 292 293 // Whether to initialize the class. 294 const bool do_clinit_; 295 296 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); 297 }; 298 299 class LoadStringSlowPathX86_64 : public SlowPathCode { 300 public: 301 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} 302 303 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 304 LocationSummary* locations = instruction_->GetLocations(); 305 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 306 307 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 308 __ Bind(GetEntryLabel()); 309 SaveLiveRegisters(codegen, locations); 310 311 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 312 // Custom calling convention: RAX serves as both input and output. 313 __ movl(CpuRegister(RAX), Immediate(string_index.index_)); 314 x86_64_codegen->InvokeRuntime(kQuickResolveString, 315 instruction_, 316 instruction_->GetDexPc(), 317 this); 318 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 319 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 320 RestoreLiveRegisters(codegen, locations); 321 322 // Store the resolved String to the BSS entry. 323 __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false), 324 locations->Out().AsRegister<CpuRegister>()); 325 Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); 326 __ Bind(fixup_label); 327 328 __ jmp(GetExitLabel()); 329 } 330 331 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } 332 333 private: 334 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); 335 }; 336 337 class TypeCheckSlowPathX86_64 : public SlowPathCode { 338 public: 339 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) 340 : SlowPathCode(instruction), is_fatal_(is_fatal) {} 341 342 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 343 LocationSummary* locations = instruction_->GetLocations(); 344 uint32_t dex_pc = instruction_->GetDexPc(); 345 DCHECK(instruction_->IsCheckCast() 346 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 347 348 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 349 __ Bind(GetEntryLabel()); 350 351 if (!is_fatal_) { 352 SaveLiveRegisters(codegen, locations); 353 } 354 355 // We're moving two locations to locations that could overlap, so we need a parallel 356 // move resolver. 357 InvokeRuntimeCallingConvention calling_convention; 358 codegen->EmitParallelMoves(locations->InAt(0), 359 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 360 Primitive::kPrimNot, 361 locations->InAt(1), 362 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 363 Primitive::kPrimNot); 364 if (instruction_->IsInstanceOf()) { 365 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 366 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 367 } else { 368 DCHECK(instruction_->IsCheckCast()); 369 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 370 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 371 } 372 373 if (!is_fatal_) { 374 if (instruction_->IsInstanceOf()) { 375 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 376 } 377 378 RestoreLiveRegisters(codegen, locations); 379 __ jmp(GetExitLabel()); 380 } 381 } 382 383 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; } 384 385 bool IsFatal() const OVERRIDE { return is_fatal_; } 386 387 private: 388 const bool is_fatal_; 389 390 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); 391 }; 392 393 class DeoptimizationSlowPathX86_64 : public SlowPathCode { 394 public: 395 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) 396 : SlowPathCode(instruction) {} 397 398 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 399 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 400 __ Bind(GetEntryLabel()); 401 LocationSummary* locations = instruction_->GetLocations(); 402 SaveLiveRegisters(codegen, locations); 403 InvokeRuntimeCallingConvention calling_convention; 404 x86_64_codegen->Load32BitValue( 405 CpuRegister(calling_convention.GetRegisterAt(0)), 406 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 407 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 408 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 409 } 410 411 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } 412 413 private: 414 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); 415 }; 416 417 class ArraySetSlowPathX86_64 : public SlowPathCode { 418 public: 419 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} 420 421 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 422 LocationSummary* locations = instruction_->GetLocations(); 423 __ Bind(GetEntryLabel()); 424 SaveLiveRegisters(codegen, locations); 425 426 InvokeRuntimeCallingConvention calling_convention; 427 HParallelMove parallel_move(codegen->GetGraph()->GetArena()); 428 parallel_move.AddMove( 429 locations->InAt(0), 430 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 431 Primitive::kPrimNot, 432 nullptr); 433 parallel_move.AddMove( 434 locations->InAt(1), 435 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 436 Primitive::kPrimInt, 437 nullptr); 438 parallel_move.AddMove( 439 locations->InAt(2), 440 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 441 Primitive::kPrimNot, 442 nullptr); 443 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 444 445 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 446 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 447 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 448 RestoreLiveRegisters(codegen, locations); 449 __ jmp(GetExitLabel()); 450 } 451 452 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; } 453 454 private: 455 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); 456 }; 457 458 // Slow path marking an object reference `ref` during a read 459 // barrier. The field `obj.field` in the object `obj` holding this 460 // reference does not get updated by this slow path after marking (see 461 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). 462 // 463 // This means that after the execution of this slow path, `ref` will 464 // always be up-to-date, but `obj.field` may not; i.e., after the 465 // flip, `ref` will be a to-space reference, but `obj.field` will 466 // probably still be a from-space reference (unless it gets updated by 467 // another thread, or if another thread installed another object 468 // reference (different from `ref`) in `obj.field`). 469 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { 470 public: 471 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, 472 Location ref, 473 bool unpoison_ref_before_marking) 474 : SlowPathCode(instruction), 475 ref_(ref), 476 unpoison_ref_before_marking_(unpoison_ref_before_marking) { 477 DCHECK(kEmitCompilerReadBarrier); 478 } 479 480 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; } 481 482 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 483 LocationSummary* locations = instruction_->GetLocations(); 484 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 485 Register ref_reg = ref_cpu_reg.AsRegister(); 486 DCHECK(locations->CanCall()); 487 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 488 DCHECK(instruction_->IsInstanceFieldGet() || 489 instruction_->IsStaticFieldGet() || 490 instruction_->IsArrayGet() || 491 instruction_->IsArraySet() || 492 instruction_->IsLoadClass() || 493 instruction_->IsLoadString() || 494 instruction_->IsInstanceOf() || 495 instruction_->IsCheckCast() || 496 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 497 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 498 << "Unexpected instruction in read barrier marking slow path: " 499 << instruction_->DebugName(); 500 501 __ Bind(GetEntryLabel()); 502 if (unpoison_ref_before_marking_) { 503 // Object* ref = ref_addr->AsMirrorPtr() 504 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 505 } 506 // No need to save live registers; it's taken care of by the 507 // entrypoint. Also, there is no need to update the stack mask, 508 // as this runtime call will not trigger a garbage collection. 509 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 510 DCHECK_NE(ref_reg, RSP); 511 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 512 // "Compact" slow path, saving two moves. 513 // 514 // Instead of using the standard runtime calling convention (input 515 // and output in R0): 516 // 517 // RDI <- ref 518 // RAX <- ReadBarrierMark(RDI) 519 // ref <- RAX 520 // 521 // we just use rX (the register containing `ref`) as input and output 522 // of a dedicated entrypoint: 523 // 524 // rX <- ReadBarrierMarkRegX(rX) 525 // 526 int32_t entry_point_offset = 527 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 528 // This runtime call does not require a stack map. 529 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 530 __ jmp(GetExitLabel()); 531 } 532 533 private: 534 // The location (register) of the marked object reference. 535 const Location ref_; 536 // Should the reference in `ref_` be unpoisoned prior to marking it? 537 const bool unpoison_ref_before_marking_; 538 539 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); 540 }; 541 542 // Slow path marking an object reference `ref` during a read barrier, 543 // and if needed, atomically updating the field `obj.field` in the 544 // object `obj` holding this reference after marking (contrary to 545 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update 546 // `obj.field`). 547 // 548 // This means that after the execution of this slow path, both `ref` 549 // and `obj.field` will be up-to-date; i.e., after the flip, both will 550 // hold the same to-space reference (unless another thread installed 551 // another object reference (different from `ref`) in `obj.field`). 552 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { 553 public: 554 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, 555 Location ref, 556 CpuRegister obj, 557 const Address& field_addr, 558 bool unpoison_ref_before_marking, 559 CpuRegister temp1, 560 CpuRegister temp2) 561 : SlowPathCode(instruction), 562 ref_(ref), 563 obj_(obj), 564 field_addr_(field_addr), 565 unpoison_ref_before_marking_(unpoison_ref_before_marking), 566 temp1_(temp1), 567 temp2_(temp2) { 568 DCHECK(kEmitCompilerReadBarrier); 569 } 570 571 const char* GetDescription() const OVERRIDE { 572 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; 573 } 574 575 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 576 LocationSummary* locations = instruction_->GetLocations(); 577 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 578 Register ref_reg = ref_cpu_reg.AsRegister(); 579 DCHECK(locations->CanCall()); 580 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 581 // This slow path is only used by the UnsafeCASObject intrinsic. 582 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 583 << "Unexpected instruction in read barrier marking and field updating slow path: " 584 << instruction_->DebugName(); 585 DCHECK(instruction_->GetLocations()->Intrinsified()); 586 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 587 588 __ Bind(GetEntryLabel()); 589 if (unpoison_ref_before_marking_) { 590 // Object* ref = ref_addr->AsMirrorPtr() 591 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 592 } 593 594 // Save the old (unpoisoned) reference. 595 __ movl(temp1_, ref_cpu_reg); 596 597 // No need to save live registers; it's taken care of by the 598 // entrypoint. Also, there is no need to update the stack mask, 599 // as this runtime call will not trigger a garbage collection. 600 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 601 DCHECK_NE(ref_reg, RSP); 602 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 603 // "Compact" slow path, saving two moves. 604 // 605 // Instead of using the standard runtime calling convention (input 606 // and output in R0): 607 // 608 // RDI <- ref 609 // RAX <- ReadBarrierMark(RDI) 610 // ref <- RAX 611 // 612 // we just use rX (the register containing `ref`) as input and output 613 // of a dedicated entrypoint: 614 // 615 // rX <- ReadBarrierMarkRegX(rX) 616 // 617 int32_t entry_point_offset = 618 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 619 // This runtime call does not require a stack map. 620 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 621 622 // If the new reference is different from the old reference, 623 // update the field in the holder (`*field_addr`). 624 // 625 // Note that this field could also hold a different object, if 626 // another thread had concurrently changed it. In that case, the 627 // LOCK CMPXCHGL instruction in the compare-and-set (CAS) 628 // operation below would abort the CAS, leaving the field as-is. 629 NearLabel done; 630 __ cmpl(temp1_, ref_cpu_reg); 631 __ j(kEqual, &done); 632 633 // Update the the holder's field atomically. This may fail if 634 // mutator updates before us, but it's OK. This is achived 635 // using a strong compare-and-set (CAS) operation with relaxed 636 // memory synchronization ordering, where the expected value is 637 // the old reference and the desired value is the new reference. 638 // This operation is implemented with a 32-bit LOCK CMPXLCHG 639 // instruction, which requires the expected value (the old 640 // reference) to be in EAX. Save RAX beforehand, and move the 641 // expected value (stored in `temp1_`) into EAX. 642 __ movq(temp2_, CpuRegister(RAX)); 643 __ movl(CpuRegister(RAX), temp1_); 644 645 // Convenience aliases. 646 CpuRegister base = obj_; 647 CpuRegister expected = CpuRegister(RAX); 648 CpuRegister value = ref_cpu_reg; 649 650 bool base_equals_value = (base.AsRegister() == value.AsRegister()); 651 Register value_reg = ref_reg; 652 if (kPoisonHeapReferences) { 653 if (base_equals_value) { 654 // If `base` and `value` are the same register location, move 655 // `value_reg` to a temporary register. This way, poisoning 656 // `value_reg` won't invalidate `base`. 657 value_reg = temp1_.AsRegister(); 658 __ movl(CpuRegister(value_reg), base); 659 } 660 661 // Check that the register allocator did not assign the location 662 // of `expected` (RAX) to `value` nor to `base`, so that heap 663 // poisoning (when enabled) works as intended below. 664 // - If `value` were equal to `expected`, both references would 665 // be poisoned twice, meaning they would not be poisoned at 666 // all, as heap poisoning uses address negation. 667 // - If `base` were equal to `expected`, poisoning `expected` 668 // would invalidate `base`. 669 DCHECK_NE(value_reg, expected.AsRegister()); 670 DCHECK_NE(base.AsRegister(), expected.AsRegister()); 671 672 __ PoisonHeapReference(expected); 673 __ PoisonHeapReference(CpuRegister(value_reg)); 674 } 675 676 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); 677 678 // If heap poisoning is enabled, we need to unpoison the values 679 // that were poisoned earlier. 680 if (kPoisonHeapReferences) { 681 if (base_equals_value) { 682 // `value_reg` has been moved to a temporary register, no need 683 // to unpoison it. 684 } else { 685 __ UnpoisonHeapReference(CpuRegister(value_reg)); 686 } 687 // No need to unpoison `expected` (RAX), as it is be overwritten below. 688 } 689 690 // Restore RAX. 691 __ movq(CpuRegister(RAX), temp2_); 692 693 __ Bind(&done); 694 __ jmp(GetExitLabel()); 695 } 696 697 private: 698 // The location (register) of the marked object reference. 699 const Location ref_; 700 // The register containing the object holding the marked object reference field. 701 const CpuRegister obj_; 702 // The address of the marked reference field. The base of this address must be `obj_`. 703 const Address field_addr_; 704 705 // Should the reference in `ref_` be unpoisoned prior to marking it? 706 const bool unpoison_ref_before_marking_; 707 708 const CpuRegister temp1_; 709 const CpuRegister temp2_; 710 711 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); 712 }; 713 714 // Slow path generating a read barrier for a heap reference. 715 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { 716 public: 717 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, 718 Location out, 719 Location ref, 720 Location obj, 721 uint32_t offset, 722 Location index) 723 : SlowPathCode(instruction), 724 out_(out), 725 ref_(ref), 726 obj_(obj), 727 offset_(offset), 728 index_(index) { 729 DCHECK(kEmitCompilerReadBarrier); 730 // If `obj` is equal to `out` or `ref`, it means the initial 731 // object has been overwritten by (or after) the heap object 732 // reference load to be instrumented, e.g.: 733 // 734 // __ movl(out, Address(out, offset)); 735 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 736 // 737 // In that case, we have lost the information about the original 738 // object, and the emitted read barrier cannot work properly. 739 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 740 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 741 } 742 743 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 744 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 745 LocationSummary* locations = instruction_->GetLocations(); 746 CpuRegister reg_out = out_.AsRegister<CpuRegister>(); 747 DCHECK(locations->CanCall()); 748 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; 749 DCHECK(instruction_->IsInstanceFieldGet() || 750 instruction_->IsStaticFieldGet() || 751 instruction_->IsArrayGet() || 752 instruction_->IsInstanceOf() || 753 instruction_->IsCheckCast() || 754 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 755 << "Unexpected instruction in read barrier for heap reference slow path: " 756 << instruction_->DebugName(); 757 758 __ Bind(GetEntryLabel()); 759 SaveLiveRegisters(codegen, locations); 760 761 // We may have to change the index's value, but as `index_` is a 762 // constant member (like other "inputs" of this slow path), 763 // introduce a copy of it, `index`. 764 Location index = index_; 765 if (index_.IsValid()) { 766 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 767 if (instruction_->IsArrayGet()) { 768 // Compute real offset and store it in index_. 769 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); 770 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); 771 if (codegen->IsCoreCalleeSaveRegister(index_reg)) { 772 // We are about to change the value of `index_reg` (see the 773 // calls to art::x86_64::X86_64Assembler::shll and 774 // art::x86_64::X86_64Assembler::AddImmediate below), but it 775 // has not been saved by the previous call to 776 // art::SlowPathCode::SaveLiveRegisters, as it is a 777 // callee-save register -- 778 // art::SlowPathCode::SaveLiveRegisters does not consider 779 // callee-save registers, as it has been designed with the 780 // assumption that callee-save registers are supposed to be 781 // handled by the called function. So, as a callee-save 782 // register, `index_reg` _would_ eventually be saved onto 783 // the stack, but it would be too late: we would have 784 // changed its value earlier. Therefore, we manually save 785 // it here into another freely available register, 786 // `free_reg`, chosen of course among the caller-save 787 // registers (as a callee-save `free_reg` register would 788 // exhibit the same problem). 789 // 790 // Note we could have requested a temporary register from 791 // the register allocator instead; but we prefer not to, as 792 // this is a slow path, and we know we can find a 793 // caller-save register that is available. 794 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); 795 __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); 796 index_reg = free_reg; 797 index = Location::RegisterLocation(index_reg); 798 } else { 799 // The initial register stored in `index_` has already been 800 // saved in the call to art::SlowPathCode::SaveLiveRegisters 801 // (as it is not a callee-save register), so we can freely 802 // use it. 803 } 804 // Shifting the index value contained in `index_reg` by the 805 // scale factor (2) cannot overflow in practice, as the 806 // runtime is unable to allocate object arrays with a size 807 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). 808 __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); 809 static_assert( 810 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 811 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 812 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); 813 } else { 814 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 815 // intrinsics, `index_` is not shifted by a scale factor of 2 816 // (as in the case of ArrayGet), as it is actually an offset 817 // to an object field within an object. 818 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 819 DCHECK(instruction_->GetLocations()->Intrinsified()); 820 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 821 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 822 << instruction_->AsInvoke()->GetIntrinsic(); 823 DCHECK_EQ(offset_, 0U); 824 DCHECK(index_.IsRegister()); 825 } 826 } 827 828 // We're moving two or three locations to locations that could 829 // overlap, so we need a parallel move resolver. 830 InvokeRuntimeCallingConvention calling_convention; 831 HParallelMove parallel_move(codegen->GetGraph()->GetArena()); 832 parallel_move.AddMove(ref_, 833 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 834 Primitive::kPrimNot, 835 nullptr); 836 parallel_move.AddMove(obj_, 837 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 838 Primitive::kPrimNot, 839 nullptr); 840 if (index.IsValid()) { 841 parallel_move.AddMove(index, 842 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 843 Primitive::kPrimInt, 844 nullptr); 845 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 846 } else { 847 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 848 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); 849 } 850 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 851 instruction_, 852 instruction_->GetDexPc(), 853 this); 854 CheckEntrypointTypes< 855 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 856 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 857 858 RestoreLiveRegisters(codegen, locations); 859 __ jmp(GetExitLabel()); 860 } 861 862 const char* GetDescription() const OVERRIDE { 863 return "ReadBarrierForHeapReferenceSlowPathX86_64"; 864 } 865 866 private: 867 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 868 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister()); 869 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister()); 870 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 871 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 872 return static_cast<CpuRegister>(i); 873 } 874 } 875 // We shall never fail to find a free caller-save register, as 876 // there are more than two core caller-save registers on x86-64 877 // (meaning it is possible to find one which is different from 878 // `ref` and `obj`). 879 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 880 LOG(FATAL) << "Could not find a free caller-save register"; 881 UNREACHABLE(); 882 } 883 884 const Location out_; 885 const Location ref_; 886 const Location obj_; 887 const uint32_t offset_; 888 // An additional location containing an index to an array. 889 // Only used for HArrayGet and the UnsafeGetObject & 890 // UnsafeGetObjectVolatile intrinsics. 891 const Location index_; 892 893 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); 894 }; 895 896 // Slow path generating a read barrier for a GC root. 897 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { 898 public: 899 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) 900 : SlowPathCode(instruction), out_(out), root_(root) { 901 DCHECK(kEmitCompilerReadBarrier); 902 } 903 904 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 905 LocationSummary* locations = instruction_->GetLocations(); 906 DCHECK(locations->CanCall()); 907 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 908 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 909 << "Unexpected instruction in read barrier for GC root slow path: " 910 << instruction_->DebugName(); 911 912 __ Bind(GetEntryLabel()); 913 SaveLiveRegisters(codegen, locations); 914 915 InvokeRuntimeCallingConvention calling_convention; 916 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 917 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); 918 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 919 instruction_, 920 instruction_->GetDexPc(), 921 this); 922 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 923 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 924 925 RestoreLiveRegisters(codegen, locations); 926 __ jmp(GetExitLabel()); 927 } 928 929 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } 930 931 private: 932 const Location out_; 933 const Location root_; 934 935 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); 936 }; 937 938 #undef __ 939 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 940 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT 941 942 inline Condition X86_64IntegerCondition(IfCondition cond) { 943 switch (cond) { 944 case kCondEQ: return kEqual; 945 case kCondNE: return kNotEqual; 946 case kCondLT: return kLess; 947 case kCondLE: return kLessEqual; 948 case kCondGT: return kGreater; 949 case kCondGE: return kGreaterEqual; 950 case kCondB: return kBelow; 951 case kCondBE: return kBelowEqual; 952 case kCondA: return kAbove; 953 case kCondAE: return kAboveEqual; 954 } 955 LOG(FATAL) << "Unreachable"; 956 UNREACHABLE(); 957 } 958 959 // Maps FP condition to x86_64 name. 960 inline Condition X86_64FPCondition(IfCondition cond) { 961 switch (cond) { 962 case kCondEQ: return kEqual; 963 case kCondNE: return kNotEqual; 964 case kCondLT: return kBelow; 965 case kCondLE: return kBelowEqual; 966 case kCondGT: return kAbove; 967 case kCondGE: return kAboveEqual; 968 default: break; // should not happen 969 }; 970 LOG(FATAL) << "Unreachable"; 971 UNREACHABLE(); 972 } 973 974 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( 975 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 976 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { 977 return desired_dispatch_info; 978 } 979 980 void CodeGeneratorX86_64::GenerateStaticOrDirectCall( 981 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 982 // All registers are assumed to be correctly set up. 983 984 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 985 switch (invoke->GetMethodLoadKind()) { 986 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 987 // temp = thread->string_init_entrypoint 988 uint32_t offset = 989 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 990 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true)); 991 break; 992 } 993 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 994 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 995 break; 996 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: 997 DCHECK(GetCompilerOptions().IsBootImage()); 998 __ leal(temp.AsRegister<CpuRegister>(), 999 Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); 1000 RecordBootMethodPatch(invoke); 1001 break; 1002 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: 1003 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); 1004 break; 1005 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 1006 __ movq(temp.AsRegister<CpuRegister>(), 1007 Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); 1008 // Bind a new fixup label at the end of the "movl" insn. 1009 __ Bind(NewMethodBssEntryPatch( 1010 MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); 1011 break; 1012 } 1013 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 1014 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 1015 return; // No code pointer retrieval; the runtime performs the call directly. 1016 } 1017 } 1018 1019 switch (invoke->GetCodePtrLocation()) { 1020 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 1021 __ call(&frame_entry_label_); 1022 break; 1023 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 1024 // (callee_method + offset_of_quick_compiled_code)() 1025 __ call(Address(callee_method.AsRegister<CpuRegister>(), 1026 ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1027 kX86_64PointerSize).SizeValue())); 1028 break; 1029 } 1030 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1031 1032 DCHECK(!IsLeafMethod()); 1033 } 1034 1035 void CodeGeneratorX86_64::GenerateVirtualCall( 1036 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 1037 CpuRegister temp = temp_in.AsRegister<CpuRegister>(); 1038 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 1039 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); 1040 1041 // Use the calling convention instead of the location of the receiver, as 1042 // intrinsics may have put the receiver in a different register. In the intrinsics 1043 // slow path, the arguments have been moved to the right place, so here we are 1044 // guaranteed that the receiver is the first register of the calling convention. 1045 InvokeDexCallingConvention calling_convention; 1046 Register receiver = calling_convention.GetRegisterAt(0); 1047 1048 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 1049 // /* HeapReference<Class> */ temp = receiver->klass_ 1050 __ movl(temp, Address(CpuRegister(receiver), class_offset)); 1051 MaybeRecordImplicitNullCheck(invoke); 1052 // Instead of simply (possibly) unpoisoning `temp` here, we should 1053 // emit a read barrier for the previous class reference load. 1054 // However this is not required in practice, as this is an 1055 // intermediate/temporary reference and because the current 1056 // concurrent copying collector keeps the from-space memory 1057 // intact/accessible until the end of the marking phase (the 1058 // concurrent copying collector may not in the future). 1059 __ MaybeUnpoisonHeapReference(temp); 1060 // temp = temp->GetMethodAt(method_offset); 1061 __ movq(temp, Address(temp, method_offset)); 1062 // call temp->GetEntryPoint(); 1063 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1064 kX86_64PointerSize).SizeValue())); 1065 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1066 } 1067 1068 void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { 1069 boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, 1070 invoke->GetTargetMethod().dex_method_index); 1071 __ Bind(&boot_image_method_patches_.back().label); 1072 } 1073 1074 Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) { 1075 // Add a patch entry and return the label. 1076 method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.dex_method_index); 1077 return &method_bss_entry_patches_.back().label; 1078 } 1079 1080 void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { 1081 boot_image_type_patches_.emplace_back(load_class->GetDexFile(), 1082 load_class->GetTypeIndex().index_); 1083 __ Bind(&boot_image_type_patches_.back().label); 1084 } 1085 1086 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { 1087 type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); 1088 return &type_bss_entry_patches_.back().label; 1089 } 1090 1091 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { 1092 DCHECK(GetCompilerOptions().IsBootImage()); 1093 string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); 1094 __ Bind(&string_patches_.back().label); 1095 } 1096 1097 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { 1098 DCHECK(!GetCompilerOptions().IsBootImage()); 1099 string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); 1100 return &string_patches_.back().label; 1101 } 1102 1103 // The label points to the end of the "movl" or another instruction but the literal offset 1104 // for method patch needs to point to the embedded constant which occupies the last 4 bytes. 1105 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; 1106 1107 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 1108 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( 1109 const ArenaDeque<PatchInfo<Label>>& infos, 1110 ArenaVector<LinkerPatch>* linker_patches) { 1111 for (const PatchInfo<Label>& info : infos) { 1112 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 1113 linker_patches->push_back( 1114 Factory(literal_offset, &info.dex_file, info.label.Position(), info.index)); 1115 } 1116 } 1117 1118 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { 1119 DCHECK(linker_patches->empty()); 1120 size_t size = 1121 boot_image_method_patches_.size() + 1122 method_bss_entry_patches_.size() + 1123 boot_image_type_patches_.size() + 1124 type_bss_entry_patches_.size() + 1125 string_patches_.size(); 1126 linker_patches->reserve(size); 1127 if (GetCompilerOptions().IsBootImage()) { 1128 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, 1129 linker_patches); 1130 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, 1131 linker_patches); 1132 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); 1133 } else { 1134 DCHECK(boot_image_method_patches_.empty()); 1135 DCHECK(boot_image_type_patches_.empty()); 1136 EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); 1137 } 1138 EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, 1139 linker_patches); 1140 EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, 1141 linker_patches); 1142 DCHECK_EQ(size, linker_patches->size()); 1143 } 1144 1145 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { 1146 stream << Register(reg); 1147 } 1148 1149 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1150 stream << FloatRegister(reg); 1151 } 1152 1153 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1154 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); 1155 return kX86_64WordSize; 1156 } 1157 1158 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1159 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1160 return kX86_64WordSize; 1161 } 1162 1163 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1164 if (GetGraph()->HasSIMD()) { 1165 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1166 } else { 1167 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1168 } 1169 return GetFloatingPointSpillSlotSize(); 1170 } 1171 1172 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1173 if (GetGraph()->HasSIMD()) { 1174 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1175 } else { 1176 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1177 } 1178 return GetFloatingPointSpillSlotSize(); 1179 } 1180 1181 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, 1182 HInstruction* instruction, 1183 uint32_t dex_pc, 1184 SlowPathCode* slow_path) { 1185 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 1186 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value()); 1187 if (EntrypointRequiresStackMap(entrypoint)) { 1188 RecordPcInfo(instruction, dex_pc, slow_path); 1189 } 1190 } 1191 1192 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 1193 HInstruction* instruction, 1194 SlowPathCode* slow_path) { 1195 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 1196 GenerateInvokeRuntime(entry_point_offset); 1197 } 1198 1199 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { 1200 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); 1201 } 1202 1203 static constexpr int kNumberOfCpuRegisterPairs = 0; 1204 // Use a fake return address register to mimic Quick. 1205 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); 1206 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, 1207 const X86_64InstructionSetFeatures& isa_features, 1208 const CompilerOptions& compiler_options, 1209 OptimizingCompilerStats* stats) 1210 : CodeGenerator(graph, 1211 kNumberOfCpuRegisters, 1212 kNumberOfFloatRegisters, 1213 kNumberOfCpuRegisterPairs, 1214 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), 1215 arraysize(kCoreCalleeSaves)) 1216 | (1 << kFakeReturnRegister), 1217 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), 1218 arraysize(kFpuCalleeSaves)), 1219 compiler_options, 1220 stats), 1221 block_labels_(nullptr), 1222 location_builder_(graph, this), 1223 instruction_visitor_(graph, this), 1224 move_resolver_(graph->GetArena(), this), 1225 assembler_(graph->GetArena()), 1226 isa_features_(isa_features), 1227 constant_area_start_(0), 1228 boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1229 method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1230 boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1231 type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1232 string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1233 jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1234 jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 1235 fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { 1236 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); 1237 } 1238 1239 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, 1240 CodeGeneratorX86_64* codegen) 1241 : InstructionCodeGenerator(graph, codegen), 1242 assembler_(codegen->GetAssembler()), 1243 codegen_(codegen) {} 1244 1245 void CodeGeneratorX86_64::SetupBlockedRegisters() const { 1246 // Stack register is always reserved. 1247 blocked_core_registers_[RSP] = true; 1248 1249 // Block the register used as TMP. 1250 blocked_core_registers_[TMP] = true; 1251 } 1252 1253 static dwarf::Reg DWARFReg(Register reg) { 1254 return dwarf::Reg::X86_64Core(static_cast<int>(reg)); 1255 } 1256 1257 static dwarf::Reg DWARFReg(FloatRegister reg) { 1258 return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); 1259 } 1260 1261 void CodeGeneratorX86_64::GenerateFrameEntry() { 1262 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address 1263 __ Bind(&frame_entry_label_); 1264 bool skip_overflow_check = IsLeafMethod() 1265 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); 1266 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1267 1268 if (!skip_overflow_check) { 1269 __ testq(CpuRegister(RAX), Address( 1270 CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64)))); 1271 RecordPcInfo(nullptr, 0); 1272 } 1273 1274 if (HasEmptyFrame()) { 1275 return; 1276 } 1277 1278 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { 1279 Register reg = kCoreCalleeSaves[i]; 1280 if (allocated_registers_.ContainsCoreRegister(reg)) { 1281 __ pushq(CpuRegister(reg)); 1282 __ cfi().AdjustCFAOffset(kX86_64WordSize); 1283 __ cfi().RelOffset(DWARFReg(reg), 0); 1284 } 1285 } 1286 1287 int adjust = GetFrameSize() - GetCoreSpillSize(); 1288 __ subq(CpuRegister(RSP), Immediate(adjust)); 1289 __ cfi().AdjustCFAOffset(adjust); 1290 uint32_t xmm_spill_location = GetFpuSpillStart(); 1291 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); 1292 1293 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { 1294 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1295 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1296 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); 1297 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); 1298 } 1299 } 1300 1301 // Save the current method if we need it. Note that we do not 1302 // do this in HCurrentMethod, as the instruction might have been removed 1303 // in the SSA graph. 1304 if (RequiresCurrentMethod()) { 1305 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), 1306 CpuRegister(kMethodRegisterArgument)); 1307 } 1308 1309 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1310 // Initialize should_deoptimize flag to 0. 1311 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); 1312 } 1313 } 1314 1315 void CodeGeneratorX86_64::GenerateFrameExit() { 1316 __ cfi().RememberState(); 1317 if (!HasEmptyFrame()) { 1318 uint32_t xmm_spill_location = GetFpuSpillStart(); 1319 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); 1320 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { 1321 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1322 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1323 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset)); 1324 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i])); 1325 } 1326 } 1327 1328 int adjust = GetFrameSize() - GetCoreSpillSize(); 1329 __ addq(CpuRegister(RSP), Immediate(adjust)); 1330 __ cfi().AdjustCFAOffset(-adjust); 1331 1332 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { 1333 Register reg = kCoreCalleeSaves[i]; 1334 if (allocated_registers_.ContainsCoreRegister(reg)) { 1335 __ popq(CpuRegister(reg)); 1336 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); 1337 __ cfi().Restore(DWARFReg(reg)); 1338 } 1339 } 1340 } 1341 __ ret(); 1342 __ cfi().RestoreState(); 1343 __ cfi().DefCFAOffset(GetFrameSize()); 1344 } 1345 1346 void CodeGeneratorX86_64::Bind(HBasicBlock* block) { 1347 __ Bind(GetLabelOf(block)); 1348 } 1349 1350 void CodeGeneratorX86_64::Move(Location destination, Location source) { 1351 if (source.Equals(destination)) { 1352 return; 1353 } 1354 if (destination.IsRegister()) { 1355 CpuRegister dest = destination.AsRegister<CpuRegister>(); 1356 if (source.IsRegister()) { 1357 __ movq(dest, source.AsRegister<CpuRegister>()); 1358 } else if (source.IsFpuRegister()) { 1359 __ movd(dest, source.AsFpuRegister<XmmRegister>()); 1360 } else if (source.IsStackSlot()) { 1361 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1362 } else if (source.IsConstant()) { 1363 HConstant* constant = source.GetConstant(); 1364 if (constant->IsLongConstant()) { 1365 Load64BitValue(dest, constant->AsLongConstant()->GetValue()); 1366 } else { 1367 Load32BitValue(dest, GetInt32ValueOf(constant)); 1368 } 1369 } else { 1370 DCHECK(source.IsDoubleStackSlot()); 1371 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1372 } 1373 } else if (destination.IsFpuRegister()) { 1374 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 1375 if (source.IsRegister()) { 1376 __ movd(dest, source.AsRegister<CpuRegister>()); 1377 } else if (source.IsFpuRegister()) { 1378 __ movaps(dest, source.AsFpuRegister<XmmRegister>()); 1379 } else if (source.IsConstant()) { 1380 HConstant* constant = source.GetConstant(); 1381 int64_t value = CodeGenerator::GetInt64ValueOf(constant); 1382 if (constant->IsFloatConstant()) { 1383 Load32BitValue(dest, static_cast<int32_t>(value)); 1384 } else { 1385 Load64BitValue(dest, value); 1386 } 1387 } else if (source.IsStackSlot()) { 1388 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1389 } else { 1390 DCHECK(source.IsDoubleStackSlot()); 1391 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1392 } 1393 } else if (destination.IsStackSlot()) { 1394 if (source.IsRegister()) { 1395 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 1396 source.AsRegister<CpuRegister>()); 1397 } else if (source.IsFpuRegister()) { 1398 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 1399 source.AsFpuRegister<XmmRegister>()); 1400 } else if (source.IsConstant()) { 1401 HConstant* constant = source.GetConstant(); 1402 int32_t value = GetInt32ValueOf(constant); 1403 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 1404 } else { 1405 DCHECK(source.IsStackSlot()) << source; 1406 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1407 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1408 } 1409 } else { 1410 DCHECK(destination.IsDoubleStackSlot()); 1411 if (source.IsRegister()) { 1412 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 1413 source.AsRegister<CpuRegister>()); 1414 } else if (source.IsFpuRegister()) { 1415 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 1416 source.AsFpuRegister<XmmRegister>()); 1417 } else if (source.IsConstant()) { 1418 HConstant* constant = source.GetConstant(); 1419 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant()); 1420 int64_t value = GetInt64ValueOf(constant); 1421 Store64BitValueToStack(destination, value); 1422 } else { 1423 DCHECK(source.IsDoubleStackSlot()); 1424 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1425 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1426 } 1427 } 1428 } 1429 1430 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { 1431 DCHECK(location.IsRegister()); 1432 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value)); 1433 } 1434 1435 void CodeGeneratorX86_64::MoveLocation( 1436 Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) { 1437 Move(dst, src); 1438 } 1439 1440 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1441 if (location.IsRegister()) { 1442 locations->AddTemp(location); 1443 } else { 1444 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1445 } 1446 } 1447 1448 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 1449 DCHECK(!successor->IsExitBlock()); 1450 1451 HBasicBlock* block = got->GetBlock(); 1452 HInstruction* previous = got->GetPrevious(); 1453 1454 HLoopInformation* info = block->GetLoopInformation(); 1455 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 1456 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 1457 return; 1458 } 1459 1460 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 1461 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 1462 } 1463 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { 1464 __ jmp(codegen_->GetLabelOf(successor)); 1465 } 1466 } 1467 1468 void LocationsBuilderX86_64::VisitGoto(HGoto* got) { 1469 got->SetLocations(nullptr); 1470 } 1471 1472 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { 1473 HandleGoto(got, got->GetSuccessor()); 1474 } 1475 1476 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1477 try_boundary->SetLocations(nullptr); 1478 } 1479 1480 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1481 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 1482 if (!successor->IsExitBlock()) { 1483 HandleGoto(try_boundary, successor); 1484 } 1485 } 1486 1487 void LocationsBuilderX86_64::VisitExit(HExit* exit) { 1488 exit->SetLocations(nullptr); 1489 } 1490 1491 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 1492 } 1493 1494 template<class LabelType> 1495 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, 1496 LabelType* true_label, 1497 LabelType* false_label) { 1498 if (cond->IsFPConditionTrueIfNaN()) { 1499 __ j(kUnordered, true_label); 1500 } else if (cond->IsFPConditionFalseIfNaN()) { 1501 __ j(kUnordered, false_label); 1502 } 1503 __ j(X86_64FPCondition(cond->GetCondition()), true_label); 1504 } 1505 1506 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) { 1507 LocationSummary* locations = condition->GetLocations(); 1508 1509 Location left = locations->InAt(0); 1510 Location right = locations->InAt(1); 1511 Primitive::Type type = condition->InputAt(0)->GetType(); 1512 switch (type) { 1513 case Primitive::kPrimBoolean: 1514 case Primitive::kPrimByte: 1515 case Primitive::kPrimChar: 1516 case Primitive::kPrimShort: 1517 case Primitive::kPrimInt: 1518 case Primitive::kPrimNot: { 1519 codegen_->GenerateIntCompare(left, right); 1520 break; 1521 } 1522 case Primitive::kPrimLong: { 1523 codegen_->GenerateLongCompare(left, right); 1524 break; 1525 } 1526 case Primitive::kPrimFloat: { 1527 if (right.IsFpuRegister()) { 1528 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1529 } else if (right.IsConstant()) { 1530 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1531 codegen_->LiteralFloatAddress( 1532 right.GetConstant()->AsFloatConstant()->GetValue())); 1533 } else { 1534 DCHECK(right.IsStackSlot()); 1535 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1536 Address(CpuRegister(RSP), right.GetStackIndex())); 1537 } 1538 break; 1539 } 1540 case Primitive::kPrimDouble: { 1541 if (right.IsFpuRegister()) { 1542 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1543 } else if (right.IsConstant()) { 1544 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1545 codegen_->LiteralDoubleAddress( 1546 right.GetConstant()->AsDoubleConstant()->GetValue())); 1547 } else { 1548 DCHECK(right.IsDoubleStackSlot()); 1549 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1550 Address(CpuRegister(RSP), right.GetStackIndex())); 1551 } 1552 break; 1553 } 1554 default: 1555 LOG(FATAL) << "Unexpected condition type " << type; 1556 } 1557 } 1558 1559 template<class LabelType> 1560 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, 1561 LabelType* true_target_in, 1562 LabelType* false_target_in) { 1563 // Generated branching requires both targets to be explicit. If either of the 1564 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. 1565 LabelType fallthrough_target; 1566 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; 1567 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; 1568 1569 // Generate the comparison to set the CC. 1570 GenerateCompareTest(condition); 1571 1572 // Now generate the correct jump(s). 1573 Primitive::Type type = condition->InputAt(0)->GetType(); 1574 switch (type) { 1575 case Primitive::kPrimLong: { 1576 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1577 break; 1578 } 1579 case Primitive::kPrimFloat: { 1580 GenerateFPJumps(condition, true_target, false_target); 1581 break; 1582 } 1583 case Primitive::kPrimDouble: { 1584 GenerateFPJumps(condition, true_target, false_target); 1585 break; 1586 } 1587 default: 1588 LOG(FATAL) << "Unexpected condition type " << type; 1589 } 1590 1591 if (false_target != &fallthrough_target) { 1592 __ jmp(false_target); 1593 } 1594 1595 if (fallthrough_target.IsLinked()) { 1596 __ Bind(&fallthrough_target); 1597 } 1598 } 1599 1600 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { 1601 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS 1602 // are set only strictly before `branch`. We can't use the eflags on long 1603 // conditions if they are materialized due to the complex branching. 1604 return cond->IsCondition() && 1605 cond->GetNext() == branch && 1606 !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); 1607 } 1608 1609 template<class LabelType> 1610 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, 1611 size_t condition_input_index, 1612 LabelType* true_target, 1613 LabelType* false_target) { 1614 HInstruction* cond = instruction->InputAt(condition_input_index); 1615 1616 if (true_target == nullptr && false_target == nullptr) { 1617 // Nothing to do. The code always falls through. 1618 return; 1619 } else if (cond->IsIntConstant()) { 1620 // Constant condition, statically compared against "true" (integer value 1). 1621 if (cond->AsIntConstant()->IsTrue()) { 1622 if (true_target != nullptr) { 1623 __ jmp(true_target); 1624 } 1625 } else { 1626 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 1627 if (false_target != nullptr) { 1628 __ jmp(false_target); 1629 } 1630 } 1631 return; 1632 } 1633 1634 // The following code generates these patterns: 1635 // (1) true_target == nullptr && false_target != nullptr 1636 // - opposite condition true => branch to false_target 1637 // (2) true_target != nullptr && false_target == nullptr 1638 // - condition true => branch to true_target 1639 // (3) true_target != nullptr && false_target != nullptr 1640 // - condition true => branch to true_target 1641 // - branch to false_target 1642 if (IsBooleanValueOrMaterializedCondition(cond)) { 1643 if (AreEflagsSetFrom(cond, instruction)) { 1644 if (true_target == nullptr) { 1645 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target); 1646 } else { 1647 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); 1648 } 1649 } else { 1650 // Materialized condition, compare against 0. 1651 Location lhs = instruction->GetLocations()->InAt(condition_input_index); 1652 if (lhs.IsRegister()) { 1653 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); 1654 } else { 1655 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); 1656 } 1657 if (true_target == nullptr) { 1658 __ j(kEqual, false_target); 1659 } else { 1660 __ j(kNotEqual, true_target); 1661 } 1662 } 1663 } else { 1664 // Condition has not been materialized, use its inputs as the 1665 // comparison and its condition as the branch condition. 1666 HCondition* condition = cond->AsCondition(); 1667 1668 // If this is a long or FP comparison that has been folded into 1669 // the HCondition, generate the comparison directly. 1670 Primitive::Type type = condition->InputAt(0)->GetType(); 1671 if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { 1672 GenerateCompareTestAndBranch(condition, true_target, false_target); 1673 return; 1674 } 1675 1676 Location lhs = condition->GetLocations()->InAt(0); 1677 Location rhs = condition->GetLocations()->InAt(1); 1678 codegen_->GenerateIntCompare(lhs, rhs); 1679 if (true_target == nullptr) { 1680 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); 1681 } else { 1682 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1683 } 1684 } 1685 1686 // If neither branch falls through (case 3), the conditional branch to `true_target` 1687 // was already emitted (case 2) and we need to emit a jump to `false_target`. 1688 if (true_target != nullptr && false_target != nullptr) { 1689 __ jmp(false_target); 1690 } 1691 } 1692 1693 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { 1694 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); 1695 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 1696 locations->SetInAt(0, Location::Any()); 1697 } 1698 } 1699 1700 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { 1701 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 1702 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 1703 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? 1704 nullptr : codegen_->GetLabelOf(true_successor); 1705 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? 1706 nullptr : codegen_->GetLabelOf(false_successor); 1707 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); 1708 } 1709 1710 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1711 LocationSummary* locations = new (GetGraph()->GetArena()) 1712 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 1713 InvokeRuntimeCallingConvention calling_convention; 1714 RegisterSet caller_saves = RegisterSet::Empty(); 1715 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1716 locations->SetCustomSlowPathCallerSaves(caller_saves); 1717 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 1718 locations->SetInAt(0, Location::Any()); 1719 } 1720 } 1721 1722 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1723 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); 1724 GenerateTestAndBranch<Label>(deoptimize, 1725 /* condition_input_index */ 0, 1726 slow_path->GetEntryLabel(), 1727 /* false_target */ nullptr); 1728 } 1729 1730 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1731 LocationSummary* locations = new (GetGraph()->GetArena()) 1732 LocationSummary(flag, LocationSummary::kNoCall); 1733 locations->SetOut(Location::RequiresRegister()); 1734 } 1735 1736 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1737 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(), 1738 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 1739 } 1740 1741 static bool SelectCanUseCMOV(HSelect* select) { 1742 // There are no conditional move instructions for XMMs. 1743 if (Primitive::IsFloatingPointType(select->GetType())) { 1744 return false; 1745 } 1746 1747 // A FP condition doesn't generate the single CC that we need. 1748 HInstruction* condition = select->GetCondition(); 1749 if (condition->IsCondition() && 1750 Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) { 1751 return false; 1752 } 1753 1754 // We can generate a CMOV for this Select. 1755 return true; 1756 } 1757 1758 void LocationsBuilderX86_64::VisitSelect(HSelect* select) { 1759 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); 1760 if (Primitive::IsFloatingPointType(select->GetType())) { 1761 locations->SetInAt(0, Location::RequiresFpuRegister()); 1762 locations->SetInAt(1, Location::Any()); 1763 } else { 1764 locations->SetInAt(0, Location::RequiresRegister()); 1765 if (SelectCanUseCMOV(select)) { 1766 if (select->InputAt(1)->IsConstant()) { 1767 locations->SetInAt(1, Location::RequiresRegister()); 1768 } else { 1769 locations->SetInAt(1, Location::Any()); 1770 } 1771 } else { 1772 locations->SetInAt(1, Location::Any()); 1773 } 1774 } 1775 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 1776 locations->SetInAt(2, Location::RequiresRegister()); 1777 } 1778 locations->SetOut(Location::SameAsFirstInput()); 1779 } 1780 1781 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { 1782 LocationSummary* locations = select->GetLocations(); 1783 if (SelectCanUseCMOV(select)) { 1784 // If both the condition and the source types are integer, we can generate 1785 // a CMOV to implement Select. 1786 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>(); 1787 Location value_true_loc = locations->InAt(1); 1788 DCHECK(locations->InAt(0).Equals(locations->Out())); 1789 1790 HInstruction* select_condition = select->GetCondition(); 1791 Condition cond = kNotEqual; 1792 1793 // Figure out how to test the 'condition'. 1794 if (select_condition->IsCondition()) { 1795 HCondition* condition = select_condition->AsCondition(); 1796 if (!condition->IsEmittedAtUseSite()) { 1797 // This was a previously materialized condition. 1798 // Can we use the existing condition code? 1799 if (AreEflagsSetFrom(condition, select)) { 1800 // Materialization was the previous instruction. Condition codes are right. 1801 cond = X86_64IntegerCondition(condition->GetCondition()); 1802 } else { 1803 // No, we have to recreate the condition code. 1804 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1805 __ testl(cond_reg, cond_reg); 1806 } 1807 } else { 1808 GenerateCompareTest(condition); 1809 cond = X86_64IntegerCondition(condition->GetCondition()); 1810 } 1811 } else { 1812 // Must be a Boolean condition, which needs to be compared to 0. 1813 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1814 __ testl(cond_reg, cond_reg); 1815 } 1816 1817 // If the condition is true, overwrite the output, which already contains false. 1818 // Generate the correct sized CMOV. 1819 bool is_64_bit = Primitive::Is64BitType(select->GetType()); 1820 if (value_true_loc.IsRegister()) { 1821 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit); 1822 } else { 1823 __ cmov(cond, 1824 value_false, 1825 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit); 1826 } 1827 } else { 1828 NearLabel false_target; 1829 GenerateTestAndBranch<NearLabel>(select, 1830 /* condition_input_index */ 2, 1831 /* true_target */ nullptr, 1832 &false_target); 1833 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); 1834 __ Bind(&false_target); 1835 } 1836 } 1837 1838 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 1839 new (GetGraph()->GetArena()) LocationSummary(info); 1840 } 1841 1842 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) { 1843 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 1844 } 1845 1846 void CodeGeneratorX86_64::GenerateNop() { 1847 __ nop(); 1848 } 1849 1850 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { 1851 LocationSummary* locations = 1852 new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); 1853 // Handle the long/FP comparisons made in instruction simplification. 1854 switch (cond->InputAt(0)->GetType()) { 1855 case Primitive::kPrimLong: 1856 locations->SetInAt(0, Location::RequiresRegister()); 1857 locations->SetInAt(1, Location::Any()); 1858 break; 1859 case Primitive::kPrimFloat: 1860 case Primitive::kPrimDouble: 1861 locations->SetInAt(0, Location::RequiresFpuRegister()); 1862 locations->SetInAt(1, Location::Any()); 1863 break; 1864 default: 1865 locations->SetInAt(0, Location::RequiresRegister()); 1866 locations->SetInAt(1, Location::Any()); 1867 break; 1868 } 1869 if (!cond->IsEmittedAtUseSite()) { 1870 locations->SetOut(Location::RequiresRegister()); 1871 } 1872 } 1873 1874 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { 1875 if (cond->IsEmittedAtUseSite()) { 1876 return; 1877 } 1878 1879 LocationSummary* locations = cond->GetLocations(); 1880 Location lhs = locations->InAt(0); 1881 Location rhs = locations->InAt(1); 1882 CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); 1883 NearLabel true_label, false_label; 1884 1885 switch (cond->InputAt(0)->GetType()) { 1886 default: 1887 // Integer case. 1888 1889 // Clear output register: setcc only sets the low byte. 1890 __ xorl(reg, reg); 1891 1892 codegen_->GenerateIntCompare(lhs, rhs); 1893 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 1894 return; 1895 case Primitive::kPrimLong: 1896 // Clear output register: setcc only sets the low byte. 1897 __ xorl(reg, reg); 1898 1899 codegen_->GenerateLongCompare(lhs, rhs); 1900 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 1901 return; 1902 case Primitive::kPrimFloat: { 1903 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 1904 if (rhs.IsConstant()) { 1905 float value = rhs.GetConstant()->AsFloatConstant()->GetValue(); 1906 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value)); 1907 } else if (rhs.IsStackSlot()) { 1908 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 1909 } else { 1910 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 1911 } 1912 GenerateFPJumps(cond, &true_label, &false_label); 1913 break; 1914 } 1915 case Primitive::kPrimDouble: { 1916 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 1917 if (rhs.IsConstant()) { 1918 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue(); 1919 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value)); 1920 } else if (rhs.IsDoubleStackSlot()) { 1921 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 1922 } else { 1923 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 1924 } 1925 GenerateFPJumps(cond, &true_label, &false_label); 1926 break; 1927 } 1928 } 1929 1930 // Convert the jumps into the result. 1931 NearLabel done_label; 1932 1933 // False case: result = 0. 1934 __ Bind(&false_label); 1935 __ xorl(reg, reg); 1936 __ jmp(&done_label); 1937 1938 // True case: result = 1. 1939 __ Bind(&true_label); 1940 __ movl(reg, Immediate(1)); 1941 __ Bind(&done_label); 1942 } 1943 1944 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { 1945 HandleCondition(comp); 1946 } 1947 1948 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { 1949 HandleCondition(comp); 1950 } 1951 1952 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { 1953 HandleCondition(comp); 1954 } 1955 1956 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { 1957 HandleCondition(comp); 1958 } 1959 1960 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { 1961 HandleCondition(comp); 1962 } 1963 1964 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { 1965 HandleCondition(comp); 1966 } 1967 1968 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 1969 HandleCondition(comp); 1970 } 1971 1972 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 1973 HandleCondition(comp); 1974 } 1975 1976 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { 1977 HandleCondition(comp); 1978 } 1979 1980 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { 1981 HandleCondition(comp); 1982 } 1983 1984 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 1985 HandleCondition(comp); 1986 } 1987 1988 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 1989 HandleCondition(comp); 1990 } 1991 1992 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) { 1993 HandleCondition(comp); 1994 } 1995 1996 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) { 1997 HandleCondition(comp); 1998 } 1999 2000 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2001 HandleCondition(comp); 2002 } 2003 2004 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2005 HandleCondition(comp); 2006 } 2007 2008 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) { 2009 HandleCondition(comp); 2010 } 2011 2012 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) { 2013 HandleCondition(comp); 2014 } 2015 2016 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2017 HandleCondition(comp); 2018 } 2019 2020 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2021 HandleCondition(comp); 2022 } 2023 2024 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { 2025 LocationSummary* locations = 2026 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); 2027 switch (compare->InputAt(0)->GetType()) { 2028 case Primitive::kPrimBoolean: 2029 case Primitive::kPrimByte: 2030 case Primitive::kPrimShort: 2031 case Primitive::kPrimChar: 2032 case Primitive::kPrimInt: 2033 case Primitive::kPrimLong: { 2034 locations->SetInAt(0, Location::RequiresRegister()); 2035 locations->SetInAt(1, Location::Any()); 2036 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2037 break; 2038 } 2039 case Primitive::kPrimFloat: 2040 case Primitive::kPrimDouble: { 2041 locations->SetInAt(0, Location::RequiresFpuRegister()); 2042 locations->SetInAt(1, Location::Any()); 2043 locations->SetOut(Location::RequiresRegister()); 2044 break; 2045 } 2046 default: 2047 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); 2048 } 2049 } 2050 2051 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { 2052 LocationSummary* locations = compare->GetLocations(); 2053 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2054 Location left = locations->InAt(0); 2055 Location right = locations->InAt(1); 2056 2057 NearLabel less, greater, done; 2058 Primitive::Type type = compare->InputAt(0)->GetType(); 2059 Condition less_cond = kLess; 2060 2061 switch (type) { 2062 case Primitive::kPrimBoolean: 2063 case Primitive::kPrimByte: 2064 case Primitive::kPrimShort: 2065 case Primitive::kPrimChar: 2066 case Primitive::kPrimInt: { 2067 codegen_->GenerateIntCompare(left, right); 2068 break; 2069 } 2070 case Primitive::kPrimLong: { 2071 codegen_->GenerateLongCompare(left, right); 2072 break; 2073 } 2074 case Primitive::kPrimFloat: { 2075 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2076 if (right.IsConstant()) { 2077 float value = right.GetConstant()->AsFloatConstant()->GetValue(); 2078 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value)); 2079 } else if (right.IsStackSlot()) { 2080 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2081 } else { 2082 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>()); 2083 } 2084 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2085 less_cond = kBelow; // ucomis{s,d} sets CF 2086 break; 2087 } 2088 case Primitive::kPrimDouble: { 2089 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2090 if (right.IsConstant()) { 2091 double value = right.GetConstant()->AsDoubleConstant()->GetValue(); 2092 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value)); 2093 } else if (right.IsDoubleStackSlot()) { 2094 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2095 } else { 2096 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>()); 2097 } 2098 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2099 less_cond = kBelow; // ucomis{s,d} sets CF 2100 break; 2101 } 2102 default: 2103 LOG(FATAL) << "Unexpected compare type " << type; 2104 } 2105 2106 __ movl(out, Immediate(0)); 2107 __ j(kEqual, &done); 2108 __ j(less_cond, &less); 2109 2110 __ Bind(&greater); 2111 __ movl(out, Immediate(1)); 2112 __ jmp(&done); 2113 2114 __ Bind(&less); 2115 __ movl(out, Immediate(-1)); 2116 2117 __ Bind(&done); 2118 } 2119 2120 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { 2121 LocationSummary* locations = 2122 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 2123 locations->SetOut(Location::ConstantLocation(constant)); 2124 } 2125 2126 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 2127 // Will be generated at use site. 2128 } 2129 2130 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { 2131 LocationSummary* locations = 2132 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 2133 locations->SetOut(Location::ConstantLocation(constant)); 2134 } 2135 2136 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 2137 // Will be generated at use site. 2138 } 2139 2140 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { 2141 LocationSummary* locations = 2142 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 2143 locations->SetOut(Location::ConstantLocation(constant)); 2144 } 2145 2146 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 2147 // Will be generated at use site. 2148 } 2149 2150 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { 2151 LocationSummary* locations = 2152 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 2153 locations->SetOut(Location::ConstantLocation(constant)); 2154 } 2155 2156 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 2157 // Will be generated at use site. 2158 } 2159 2160 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { 2161 LocationSummary* locations = 2162 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 2163 locations->SetOut(Location::ConstantLocation(constant)); 2164 } 2165 2166 void InstructionCodeGeneratorX86_64::VisitDoubleConstant( 2167 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 2168 // Will be generated at use site. 2169 } 2170 2171 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) { 2172 constructor_fence->SetLocations(nullptr); 2173 } 2174 2175 void InstructionCodeGeneratorX86_64::VisitConstructorFence( 2176 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 2177 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2178 } 2179 2180 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2181 memory_barrier->SetLocations(nullptr); 2182 } 2183 2184 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2185 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 2186 } 2187 2188 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { 2189 ret->SetLocations(nullptr); 2190 } 2191 2192 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { 2193 codegen_->GenerateFrameExit(); 2194 } 2195 2196 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { 2197 LocationSummary* locations = 2198 new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); 2199 switch (ret->InputAt(0)->GetType()) { 2200 case Primitive::kPrimBoolean: 2201 case Primitive::kPrimByte: 2202 case Primitive::kPrimChar: 2203 case Primitive::kPrimShort: 2204 case Primitive::kPrimInt: 2205 case Primitive::kPrimNot: 2206 case Primitive::kPrimLong: 2207 locations->SetInAt(0, Location::RegisterLocation(RAX)); 2208 break; 2209 2210 case Primitive::kPrimFloat: 2211 case Primitive::kPrimDouble: 2212 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); 2213 break; 2214 2215 default: 2216 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2217 } 2218 } 2219 2220 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { 2221 if (kIsDebugBuild) { 2222 switch (ret->InputAt(0)->GetType()) { 2223 case Primitive::kPrimBoolean: 2224 case Primitive::kPrimByte: 2225 case Primitive::kPrimChar: 2226 case Primitive::kPrimShort: 2227 case Primitive::kPrimInt: 2228 case Primitive::kPrimNot: 2229 case Primitive::kPrimLong: 2230 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); 2231 break; 2232 2233 case Primitive::kPrimFloat: 2234 case Primitive::kPrimDouble: 2235 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), 2236 XMM0); 2237 break; 2238 2239 default: 2240 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2241 } 2242 } 2243 codegen_->GenerateFrameExit(); 2244 } 2245 2246 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const { 2247 switch (type) { 2248 case Primitive::kPrimBoolean: 2249 case Primitive::kPrimByte: 2250 case Primitive::kPrimChar: 2251 case Primitive::kPrimShort: 2252 case Primitive::kPrimInt: 2253 case Primitive::kPrimNot: 2254 case Primitive::kPrimLong: 2255 return Location::RegisterLocation(RAX); 2256 2257 case Primitive::kPrimVoid: 2258 return Location::NoLocation(); 2259 2260 case Primitive::kPrimDouble: 2261 case Primitive::kPrimFloat: 2262 return Location::FpuRegisterLocation(XMM0); 2263 } 2264 2265 UNREACHABLE(); 2266 } 2267 2268 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const { 2269 return Location::RegisterLocation(kMethodRegisterArgument); 2270 } 2271 2272 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) { 2273 switch (type) { 2274 case Primitive::kPrimBoolean: 2275 case Primitive::kPrimByte: 2276 case Primitive::kPrimChar: 2277 case Primitive::kPrimShort: 2278 case Primitive::kPrimInt: 2279 case Primitive::kPrimNot: { 2280 uint32_t index = gp_index_++; 2281 stack_index_++; 2282 if (index < calling_convention.GetNumberOfRegisters()) { 2283 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2284 } else { 2285 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2286 } 2287 } 2288 2289 case Primitive::kPrimLong: { 2290 uint32_t index = gp_index_; 2291 stack_index_ += 2; 2292 if (index < calling_convention.GetNumberOfRegisters()) { 2293 gp_index_ += 1; 2294 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2295 } else { 2296 gp_index_ += 2; 2297 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2298 } 2299 } 2300 2301 case Primitive::kPrimFloat: { 2302 uint32_t index = float_index_++; 2303 stack_index_++; 2304 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2305 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2306 } else { 2307 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2308 } 2309 } 2310 2311 case Primitive::kPrimDouble: { 2312 uint32_t index = float_index_++; 2313 stack_index_ += 2; 2314 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2315 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2316 } else { 2317 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2318 } 2319 } 2320 2321 case Primitive::kPrimVoid: 2322 LOG(FATAL) << "Unexpected parameter type " << type; 2323 break; 2324 } 2325 return Location::NoLocation(); 2326 } 2327 2328 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2329 // The trampoline uses the same calling convention as dex calling conventions, 2330 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 2331 // the method_idx. 2332 HandleInvoke(invoke); 2333 } 2334 2335 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2336 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 2337 } 2338 2339 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2340 // Explicit clinit checks triggered by static invokes must have been pruned by 2341 // art::PrepareForRegisterAllocation. 2342 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2343 2344 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2345 if (intrinsic.TryDispatch(invoke)) { 2346 return; 2347 } 2348 2349 HandleInvoke(invoke); 2350 } 2351 2352 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 2353 if (invoke->GetLocations()->Intrinsified()) { 2354 IntrinsicCodeGeneratorX86_64 intrinsic(codegen); 2355 intrinsic.Dispatch(invoke); 2356 return true; 2357 } 2358 return false; 2359 } 2360 2361 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2362 // Explicit clinit checks triggered by static invokes must have been pruned by 2363 // art::PrepareForRegisterAllocation. 2364 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2365 2366 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2367 return; 2368 } 2369 2370 LocationSummary* locations = invoke->GetLocations(); 2371 codegen_->GenerateStaticOrDirectCall( 2372 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 2373 } 2374 2375 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { 2376 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 2377 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 2378 } 2379 2380 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2381 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2382 if (intrinsic.TryDispatch(invoke)) { 2383 return; 2384 } 2385 2386 HandleInvoke(invoke); 2387 } 2388 2389 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2390 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2391 return; 2392 } 2393 2394 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 2395 DCHECK(!codegen_->IsLeafMethod()); 2396 } 2397 2398 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2399 HandleInvoke(invoke); 2400 // Add the hidden argument. 2401 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX)); 2402 } 2403 2404 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2405 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 2406 LocationSummary* locations = invoke->GetLocations(); 2407 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2408 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 2409 Location receiver = locations->InAt(0); 2410 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 2411 2412 // Set the hidden argument. This is safe to do this here, as RAX 2413 // won't be modified thereafter, before the `call` instruction. 2414 DCHECK_EQ(RAX, hidden_reg.AsRegister()); 2415 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); 2416 2417 if (receiver.IsStackSlot()) { 2418 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); 2419 // /* HeapReference<Class> */ temp = temp->klass_ 2420 __ movl(temp, Address(temp, class_offset)); 2421 } else { 2422 // /* HeapReference<Class> */ temp = receiver->klass_ 2423 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); 2424 } 2425 codegen_->MaybeRecordImplicitNullCheck(invoke); 2426 // Instead of simply (possibly) unpoisoning `temp` here, we should 2427 // emit a read barrier for the previous class reference load. 2428 // However this is not required in practice, as this is an 2429 // intermediate/temporary reference and because the current 2430 // concurrent copying collector keeps the from-space memory 2431 // intact/accessible until the end of the marking phase (the 2432 // concurrent copying collector may not in the future). 2433 __ MaybeUnpoisonHeapReference(temp); 2434 // temp = temp->GetAddressOfIMT() 2435 __ movq(temp, 2436 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 2437 // temp = temp->GetImtEntryAt(method_offset); 2438 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 2439 invoke->GetImtIndex(), kX86_64PointerSize)); 2440 // temp = temp->GetImtEntryAt(method_offset); 2441 __ movq(temp, Address(temp, method_offset)); 2442 // call temp->GetEntryPoint(); 2443 __ call(Address( 2444 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue())); 2445 2446 DCHECK(!codegen_->IsLeafMethod()); 2447 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 2448 } 2449 2450 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2451 HandleInvoke(invoke); 2452 } 2453 2454 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2455 codegen_->GenerateInvokePolymorphicCall(invoke); 2456 } 2457 2458 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { 2459 LocationSummary* locations = 2460 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); 2461 switch (neg->GetResultType()) { 2462 case Primitive::kPrimInt: 2463 case Primitive::kPrimLong: 2464 locations->SetInAt(0, Location::RequiresRegister()); 2465 locations->SetOut(Location::SameAsFirstInput()); 2466 break; 2467 2468 case Primitive::kPrimFloat: 2469 case Primitive::kPrimDouble: 2470 locations->SetInAt(0, Location::RequiresFpuRegister()); 2471 locations->SetOut(Location::SameAsFirstInput()); 2472 locations->AddTemp(Location::RequiresFpuRegister()); 2473 break; 2474 2475 default: 2476 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2477 } 2478 } 2479 2480 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { 2481 LocationSummary* locations = neg->GetLocations(); 2482 Location out = locations->Out(); 2483 Location in = locations->InAt(0); 2484 switch (neg->GetResultType()) { 2485 case Primitive::kPrimInt: 2486 DCHECK(in.IsRegister()); 2487 DCHECK(in.Equals(out)); 2488 __ negl(out.AsRegister<CpuRegister>()); 2489 break; 2490 2491 case Primitive::kPrimLong: 2492 DCHECK(in.IsRegister()); 2493 DCHECK(in.Equals(out)); 2494 __ negq(out.AsRegister<CpuRegister>()); 2495 break; 2496 2497 case Primitive::kPrimFloat: { 2498 DCHECK(in.Equals(out)); 2499 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2500 // Implement float negation with an exclusive or with value 2501 // 0x80000000 (mask for bit 31, representing the sign of a 2502 // single-precision floating-point number). 2503 __ movss(mask, codegen_->LiteralInt32Address(0x80000000)); 2504 __ xorps(out.AsFpuRegister<XmmRegister>(), mask); 2505 break; 2506 } 2507 2508 case Primitive::kPrimDouble: { 2509 DCHECK(in.Equals(out)); 2510 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2511 // Implement double negation with an exclusive or with value 2512 // 0x8000000000000000 (mask for bit 63, representing the sign of 2513 // a double-precision floating-point number). 2514 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000))); 2515 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); 2516 break; 2517 } 2518 2519 default: 2520 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2521 } 2522 } 2523 2524 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2525 LocationSummary* locations = 2526 new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); 2527 Primitive::Type result_type = conversion->GetResultType(); 2528 Primitive::Type input_type = conversion->GetInputType(); 2529 DCHECK_NE(result_type, input_type); 2530 2531 // The Java language does not allow treating boolean as an integral type but 2532 // our bit representation makes it safe. 2533 2534 switch (result_type) { 2535 case Primitive::kPrimByte: 2536 switch (input_type) { 2537 case Primitive::kPrimLong: 2538 // Type conversion from long to byte is a result of code transformations. 2539 case Primitive::kPrimBoolean: 2540 // Boolean input is a result of code transformations. 2541 case Primitive::kPrimShort: 2542 case Primitive::kPrimInt: 2543 case Primitive::kPrimChar: 2544 // Processing a Dex `int-to-byte' instruction. 2545 locations->SetInAt(0, Location::Any()); 2546 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2547 break; 2548 2549 default: 2550 LOG(FATAL) << "Unexpected type conversion from " << input_type 2551 << " to " << result_type; 2552 } 2553 break; 2554 2555 case Primitive::kPrimShort: 2556 switch (input_type) { 2557 case Primitive::kPrimLong: 2558 // Type conversion from long to short is a result of code transformations. 2559 case Primitive::kPrimBoolean: 2560 // Boolean input is a result of code transformations. 2561 case Primitive::kPrimByte: 2562 case Primitive::kPrimInt: 2563 case Primitive::kPrimChar: 2564 // Processing a Dex `int-to-short' instruction. 2565 locations->SetInAt(0, Location::Any()); 2566 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2567 break; 2568 2569 default: 2570 LOG(FATAL) << "Unexpected type conversion from " << input_type 2571 << " to " << result_type; 2572 } 2573 break; 2574 2575 case Primitive::kPrimInt: 2576 switch (input_type) { 2577 case Primitive::kPrimLong: 2578 // Processing a Dex `long-to-int' instruction. 2579 locations->SetInAt(0, Location::Any()); 2580 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2581 break; 2582 2583 case Primitive::kPrimFloat: 2584 // Processing a Dex `float-to-int' instruction. 2585 locations->SetInAt(0, Location::RequiresFpuRegister()); 2586 locations->SetOut(Location::RequiresRegister()); 2587 break; 2588 2589 case Primitive::kPrimDouble: 2590 // Processing a Dex `double-to-int' instruction. 2591 locations->SetInAt(0, Location::RequiresFpuRegister()); 2592 locations->SetOut(Location::RequiresRegister()); 2593 break; 2594 2595 default: 2596 LOG(FATAL) << "Unexpected type conversion from " << input_type 2597 << " to " << result_type; 2598 } 2599 break; 2600 2601 case Primitive::kPrimLong: 2602 switch (input_type) { 2603 case Primitive::kPrimBoolean: 2604 // Boolean input is a result of code transformations. 2605 case Primitive::kPrimByte: 2606 case Primitive::kPrimShort: 2607 case Primitive::kPrimInt: 2608 case Primitive::kPrimChar: 2609 // Processing a Dex `int-to-long' instruction. 2610 // TODO: We would benefit from a (to-be-implemented) 2611 // Location::RegisterOrStackSlot requirement for this input. 2612 locations->SetInAt(0, Location::RequiresRegister()); 2613 locations->SetOut(Location::RequiresRegister()); 2614 break; 2615 2616 case Primitive::kPrimFloat: 2617 // Processing a Dex `float-to-long' instruction. 2618 locations->SetInAt(0, Location::RequiresFpuRegister()); 2619 locations->SetOut(Location::RequiresRegister()); 2620 break; 2621 2622 case Primitive::kPrimDouble: 2623 // Processing a Dex `double-to-long' instruction. 2624 locations->SetInAt(0, Location::RequiresFpuRegister()); 2625 locations->SetOut(Location::RequiresRegister()); 2626 break; 2627 2628 default: 2629 LOG(FATAL) << "Unexpected type conversion from " << input_type 2630 << " to " << result_type; 2631 } 2632 break; 2633 2634 case Primitive::kPrimChar: 2635 switch (input_type) { 2636 case Primitive::kPrimLong: 2637 // Type conversion from long to char is a result of code transformations. 2638 case Primitive::kPrimBoolean: 2639 // Boolean input is a result of code transformations. 2640 case Primitive::kPrimByte: 2641 case Primitive::kPrimShort: 2642 case Primitive::kPrimInt: 2643 // Processing a Dex `int-to-char' instruction. 2644 locations->SetInAt(0, Location::Any()); 2645 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2646 break; 2647 2648 default: 2649 LOG(FATAL) << "Unexpected type conversion from " << input_type 2650 << " to " << result_type; 2651 } 2652 break; 2653 2654 case Primitive::kPrimFloat: 2655 switch (input_type) { 2656 case Primitive::kPrimBoolean: 2657 // Boolean input is a result of code transformations. 2658 case Primitive::kPrimByte: 2659 case Primitive::kPrimShort: 2660 case Primitive::kPrimInt: 2661 case Primitive::kPrimChar: 2662 // Processing a Dex `int-to-float' instruction. 2663 locations->SetInAt(0, Location::Any()); 2664 locations->SetOut(Location::RequiresFpuRegister()); 2665 break; 2666 2667 case Primitive::kPrimLong: 2668 // Processing a Dex `long-to-float' instruction. 2669 locations->SetInAt(0, Location::Any()); 2670 locations->SetOut(Location::RequiresFpuRegister()); 2671 break; 2672 2673 case Primitive::kPrimDouble: 2674 // Processing a Dex `double-to-float' instruction. 2675 locations->SetInAt(0, Location::Any()); 2676 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2677 break; 2678 2679 default: 2680 LOG(FATAL) << "Unexpected type conversion from " << input_type 2681 << " to " << result_type; 2682 }; 2683 break; 2684 2685 case Primitive::kPrimDouble: 2686 switch (input_type) { 2687 case Primitive::kPrimBoolean: 2688 // Boolean input is a result of code transformations. 2689 case Primitive::kPrimByte: 2690 case Primitive::kPrimShort: 2691 case Primitive::kPrimInt: 2692 case Primitive::kPrimChar: 2693 // Processing a Dex `int-to-double' instruction. 2694 locations->SetInAt(0, Location::Any()); 2695 locations->SetOut(Location::RequiresFpuRegister()); 2696 break; 2697 2698 case Primitive::kPrimLong: 2699 // Processing a Dex `long-to-double' instruction. 2700 locations->SetInAt(0, Location::Any()); 2701 locations->SetOut(Location::RequiresFpuRegister()); 2702 break; 2703 2704 case Primitive::kPrimFloat: 2705 // Processing a Dex `float-to-double' instruction. 2706 locations->SetInAt(0, Location::Any()); 2707 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2708 break; 2709 2710 default: 2711 LOG(FATAL) << "Unexpected type conversion from " << input_type 2712 << " to " << result_type; 2713 } 2714 break; 2715 2716 default: 2717 LOG(FATAL) << "Unexpected type conversion from " << input_type 2718 << " to " << result_type; 2719 } 2720 } 2721 2722 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2723 LocationSummary* locations = conversion->GetLocations(); 2724 Location out = locations->Out(); 2725 Location in = locations->InAt(0); 2726 Primitive::Type result_type = conversion->GetResultType(); 2727 Primitive::Type input_type = conversion->GetInputType(); 2728 DCHECK_NE(result_type, input_type); 2729 switch (result_type) { 2730 case Primitive::kPrimByte: 2731 switch (input_type) { 2732 case Primitive::kPrimLong: 2733 // Type conversion from long to byte is a result of code transformations. 2734 case Primitive::kPrimBoolean: 2735 // Boolean input is a result of code transformations. 2736 case Primitive::kPrimShort: 2737 case Primitive::kPrimInt: 2738 case Primitive::kPrimChar: 2739 // Processing a Dex `int-to-byte' instruction. 2740 if (in.IsRegister()) { 2741 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2742 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2743 __ movsxb(out.AsRegister<CpuRegister>(), 2744 Address(CpuRegister(RSP), in.GetStackIndex())); 2745 } else { 2746 __ movl(out.AsRegister<CpuRegister>(), 2747 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant())))); 2748 } 2749 break; 2750 2751 default: 2752 LOG(FATAL) << "Unexpected type conversion from " << input_type 2753 << " to " << result_type; 2754 } 2755 break; 2756 2757 case Primitive::kPrimShort: 2758 switch (input_type) { 2759 case Primitive::kPrimLong: 2760 // Type conversion from long to short is a result of code transformations. 2761 case Primitive::kPrimBoolean: 2762 // Boolean input is a result of code transformations. 2763 case Primitive::kPrimByte: 2764 case Primitive::kPrimInt: 2765 case Primitive::kPrimChar: 2766 // Processing a Dex `int-to-short' instruction. 2767 if (in.IsRegister()) { 2768 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2769 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2770 __ movsxw(out.AsRegister<CpuRegister>(), 2771 Address(CpuRegister(RSP), in.GetStackIndex())); 2772 } else { 2773 __ movl(out.AsRegister<CpuRegister>(), 2774 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant())))); 2775 } 2776 break; 2777 2778 default: 2779 LOG(FATAL) << "Unexpected type conversion from " << input_type 2780 << " to " << result_type; 2781 } 2782 break; 2783 2784 case Primitive::kPrimInt: 2785 switch (input_type) { 2786 case Primitive::kPrimLong: 2787 // Processing a Dex `long-to-int' instruction. 2788 if (in.IsRegister()) { 2789 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2790 } else if (in.IsDoubleStackSlot()) { 2791 __ movl(out.AsRegister<CpuRegister>(), 2792 Address(CpuRegister(RSP), in.GetStackIndex())); 2793 } else { 2794 DCHECK(in.IsConstant()); 2795 DCHECK(in.GetConstant()->IsLongConstant()); 2796 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2797 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 2798 } 2799 break; 2800 2801 case Primitive::kPrimFloat: { 2802 // Processing a Dex `float-to-int' instruction. 2803 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2804 CpuRegister output = out.AsRegister<CpuRegister>(); 2805 NearLabel done, nan; 2806 2807 __ movl(output, Immediate(kPrimIntMax)); 2808 // if input >= (float)INT_MAX goto done 2809 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax)); 2810 __ j(kAboveEqual, &done); 2811 // if input == NaN goto nan 2812 __ j(kUnordered, &nan); 2813 // output = float-to-int-truncate(input) 2814 __ cvttss2si(output, input, false); 2815 __ jmp(&done); 2816 __ Bind(&nan); 2817 // output = 0 2818 __ xorl(output, output); 2819 __ Bind(&done); 2820 break; 2821 } 2822 2823 case Primitive::kPrimDouble: { 2824 // Processing a Dex `double-to-int' instruction. 2825 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2826 CpuRegister output = out.AsRegister<CpuRegister>(); 2827 NearLabel done, nan; 2828 2829 __ movl(output, Immediate(kPrimIntMax)); 2830 // if input >= (double)INT_MAX goto done 2831 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax)); 2832 __ j(kAboveEqual, &done); 2833 // if input == NaN goto nan 2834 __ j(kUnordered, &nan); 2835 // output = double-to-int-truncate(input) 2836 __ cvttsd2si(output, input); 2837 __ jmp(&done); 2838 __ Bind(&nan); 2839 // output = 0 2840 __ xorl(output, output); 2841 __ Bind(&done); 2842 break; 2843 } 2844 2845 default: 2846 LOG(FATAL) << "Unexpected type conversion from " << input_type 2847 << " to " << result_type; 2848 } 2849 break; 2850 2851 case Primitive::kPrimLong: 2852 switch (input_type) { 2853 DCHECK(out.IsRegister()); 2854 case Primitive::kPrimBoolean: 2855 // Boolean input is a result of code transformations. 2856 case Primitive::kPrimByte: 2857 case Primitive::kPrimShort: 2858 case Primitive::kPrimInt: 2859 case Primitive::kPrimChar: 2860 // Processing a Dex `int-to-long' instruction. 2861 DCHECK(in.IsRegister()); 2862 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2863 break; 2864 2865 case Primitive::kPrimFloat: { 2866 // Processing a Dex `float-to-long' instruction. 2867 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2868 CpuRegister output = out.AsRegister<CpuRegister>(); 2869 NearLabel done, nan; 2870 2871 codegen_->Load64BitValue(output, kPrimLongMax); 2872 // if input >= (float)LONG_MAX goto done 2873 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax)); 2874 __ j(kAboveEqual, &done); 2875 // if input == NaN goto nan 2876 __ j(kUnordered, &nan); 2877 // output = float-to-long-truncate(input) 2878 __ cvttss2si(output, input, true); 2879 __ jmp(&done); 2880 __ Bind(&nan); 2881 // output = 0 2882 __ xorl(output, output); 2883 __ Bind(&done); 2884 break; 2885 } 2886 2887 case Primitive::kPrimDouble: { 2888 // Processing a Dex `double-to-long' instruction. 2889 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2890 CpuRegister output = out.AsRegister<CpuRegister>(); 2891 NearLabel done, nan; 2892 2893 codegen_->Load64BitValue(output, kPrimLongMax); 2894 // if input >= (double)LONG_MAX goto done 2895 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax)); 2896 __ j(kAboveEqual, &done); 2897 // if input == NaN goto nan 2898 __ j(kUnordered, &nan); 2899 // output = double-to-long-truncate(input) 2900 __ cvttsd2si(output, input, true); 2901 __ jmp(&done); 2902 __ Bind(&nan); 2903 // output = 0 2904 __ xorl(output, output); 2905 __ Bind(&done); 2906 break; 2907 } 2908 2909 default: 2910 LOG(FATAL) << "Unexpected type conversion from " << input_type 2911 << " to " << result_type; 2912 } 2913 break; 2914 2915 case Primitive::kPrimChar: 2916 switch (input_type) { 2917 case Primitive::kPrimLong: 2918 // Type conversion from long to char is a result of code transformations. 2919 case Primitive::kPrimBoolean: 2920 // Boolean input is a result of code transformations. 2921 case Primitive::kPrimByte: 2922 case Primitive::kPrimShort: 2923 case Primitive::kPrimInt: 2924 // Processing a Dex `int-to-char' instruction. 2925 if (in.IsRegister()) { 2926 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2927 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2928 __ movzxw(out.AsRegister<CpuRegister>(), 2929 Address(CpuRegister(RSP), in.GetStackIndex())); 2930 } else { 2931 __ movl(out.AsRegister<CpuRegister>(), 2932 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant())))); 2933 } 2934 break; 2935 2936 default: 2937 LOG(FATAL) << "Unexpected type conversion from " << input_type 2938 << " to " << result_type; 2939 } 2940 break; 2941 2942 case Primitive::kPrimFloat: 2943 switch (input_type) { 2944 case Primitive::kPrimBoolean: 2945 // Boolean input is a result of code transformations. 2946 case Primitive::kPrimByte: 2947 case Primitive::kPrimShort: 2948 case Primitive::kPrimInt: 2949 case Primitive::kPrimChar: 2950 // Processing a Dex `int-to-float' instruction. 2951 if (in.IsRegister()) { 2952 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 2953 } else if (in.IsConstant()) { 2954 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 2955 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2956 codegen_->Load32BitValue(dest, static_cast<float>(v)); 2957 } else { 2958 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 2959 Address(CpuRegister(RSP), in.GetStackIndex()), false); 2960 } 2961 break; 2962 2963 case Primitive::kPrimLong: 2964 // Processing a Dex `long-to-float' instruction. 2965 if (in.IsRegister()) { 2966 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 2967 } else if (in.IsConstant()) { 2968 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 2969 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2970 codegen_->Load32BitValue(dest, static_cast<float>(v)); 2971 } else { 2972 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 2973 Address(CpuRegister(RSP), in.GetStackIndex()), true); 2974 } 2975 break; 2976 2977 case Primitive::kPrimDouble: 2978 // Processing a Dex `double-to-float' instruction. 2979 if (in.IsFpuRegister()) { 2980 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 2981 } else if (in.IsConstant()) { 2982 double v = in.GetConstant()->AsDoubleConstant()->GetValue(); 2983 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2984 codegen_->Load32BitValue(dest, static_cast<float>(v)); 2985 } else { 2986 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), 2987 Address(CpuRegister(RSP), in.GetStackIndex())); 2988 } 2989 break; 2990 2991 default: 2992 LOG(FATAL) << "Unexpected type conversion from " << input_type 2993 << " to " << result_type; 2994 }; 2995 break; 2996 2997 case Primitive::kPrimDouble: 2998 switch (input_type) { 2999 case Primitive::kPrimBoolean: 3000 // Boolean input is a result of code transformations. 3001 case Primitive::kPrimByte: 3002 case Primitive::kPrimShort: 3003 case Primitive::kPrimInt: 3004 case Primitive::kPrimChar: 3005 // Processing a Dex `int-to-double' instruction. 3006 if (in.IsRegister()) { 3007 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 3008 } else if (in.IsConstant()) { 3009 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 3010 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3011 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3012 } else { 3013 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 3014 Address(CpuRegister(RSP), in.GetStackIndex()), false); 3015 } 3016 break; 3017 3018 case Primitive::kPrimLong: 3019 // Processing a Dex `long-to-double' instruction. 3020 if (in.IsRegister()) { 3021 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 3022 } else if (in.IsConstant()) { 3023 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 3024 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3025 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3026 } else { 3027 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 3028 Address(CpuRegister(RSP), in.GetStackIndex()), true); 3029 } 3030 break; 3031 3032 case Primitive::kPrimFloat: 3033 // Processing a Dex `float-to-double' instruction. 3034 if (in.IsFpuRegister()) { 3035 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 3036 } else if (in.IsConstant()) { 3037 float v = in.GetConstant()->AsFloatConstant()->GetValue(); 3038 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3039 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3040 } else { 3041 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), 3042 Address(CpuRegister(RSP), in.GetStackIndex())); 3043 } 3044 break; 3045 3046 default: 3047 LOG(FATAL) << "Unexpected type conversion from " << input_type 3048 << " to " << result_type; 3049 }; 3050 break; 3051 3052 default: 3053 LOG(FATAL) << "Unexpected type conversion from " << input_type 3054 << " to " << result_type; 3055 } 3056 } 3057 3058 void LocationsBuilderX86_64::VisitAdd(HAdd* add) { 3059 LocationSummary* locations = 3060 new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); 3061 switch (add->GetResultType()) { 3062 case Primitive::kPrimInt: { 3063 locations->SetInAt(0, Location::RequiresRegister()); 3064 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); 3065 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3066 break; 3067 } 3068 3069 case Primitive::kPrimLong: { 3070 locations->SetInAt(0, Location::RequiresRegister()); 3071 // We can use a leaq or addq if the constant can fit in an immediate. 3072 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); 3073 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3074 break; 3075 } 3076 3077 case Primitive::kPrimDouble: 3078 case Primitive::kPrimFloat: { 3079 locations->SetInAt(0, Location::RequiresFpuRegister()); 3080 locations->SetInAt(1, Location::Any()); 3081 locations->SetOut(Location::SameAsFirstInput()); 3082 break; 3083 } 3084 3085 default: 3086 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3087 } 3088 } 3089 3090 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { 3091 LocationSummary* locations = add->GetLocations(); 3092 Location first = locations->InAt(0); 3093 Location second = locations->InAt(1); 3094 Location out = locations->Out(); 3095 3096 switch (add->GetResultType()) { 3097 case Primitive::kPrimInt: { 3098 if (second.IsRegister()) { 3099 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3100 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3101 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3102 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3103 } else { 3104 __ leal(out.AsRegister<CpuRegister>(), Address( 3105 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3106 } 3107 } else if (second.IsConstant()) { 3108 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3109 __ addl(out.AsRegister<CpuRegister>(), 3110 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 3111 } else { 3112 __ leal(out.AsRegister<CpuRegister>(), Address( 3113 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue())); 3114 } 3115 } else { 3116 DCHECK(first.Equals(locations->Out())); 3117 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3118 } 3119 break; 3120 } 3121 3122 case Primitive::kPrimLong: { 3123 if (second.IsRegister()) { 3124 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3125 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3126 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3127 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3128 } else { 3129 __ leaq(out.AsRegister<CpuRegister>(), Address( 3130 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3131 } 3132 } else { 3133 DCHECK(second.IsConstant()); 3134 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3135 int32_t int32_value = Low32Bits(value); 3136 DCHECK_EQ(int32_value, value); 3137 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3138 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value)); 3139 } else { 3140 __ leaq(out.AsRegister<CpuRegister>(), Address( 3141 first.AsRegister<CpuRegister>(), int32_value)); 3142 } 3143 } 3144 break; 3145 } 3146 3147 case Primitive::kPrimFloat: { 3148 if (second.IsFpuRegister()) { 3149 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3150 } else if (second.IsConstant()) { 3151 __ addss(first.AsFpuRegister<XmmRegister>(), 3152 codegen_->LiteralFloatAddress( 3153 second.GetConstant()->AsFloatConstant()->GetValue())); 3154 } else { 3155 DCHECK(second.IsStackSlot()); 3156 __ addss(first.AsFpuRegister<XmmRegister>(), 3157 Address(CpuRegister(RSP), second.GetStackIndex())); 3158 } 3159 break; 3160 } 3161 3162 case Primitive::kPrimDouble: { 3163 if (second.IsFpuRegister()) { 3164 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3165 } else if (second.IsConstant()) { 3166 __ addsd(first.AsFpuRegister<XmmRegister>(), 3167 codegen_->LiteralDoubleAddress( 3168 second.GetConstant()->AsDoubleConstant()->GetValue())); 3169 } else { 3170 DCHECK(second.IsDoubleStackSlot()); 3171 __ addsd(first.AsFpuRegister<XmmRegister>(), 3172 Address(CpuRegister(RSP), second.GetStackIndex())); 3173 } 3174 break; 3175 } 3176 3177 default: 3178 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3179 } 3180 } 3181 3182 void LocationsBuilderX86_64::VisitSub(HSub* sub) { 3183 LocationSummary* locations = 3184 new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); 3185 switch (sub->GetResultType()) { 3186 case Primitive::kPrimInt: { 3187 locations->SetInAt(0, Location::RequiresRegister()); 3188 locations->SetInAt(1, Location::Any()); 3189 locations->SetOut(Location::SameAsFirstInput()); 3190 break; 3191 } 3192 case Primitive::kPrimLong: { 3193 locations->SetInAt(0, Location::RequiresRegister()); 3194 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); 3195 locations->SetOut(Location::SameAsFirstInput()); 3196 break; 3197 } 3198 case Primitive::kPrimFloat: 3199 case Primitive::kPrimDouble: { 3200 locations->SetInAt(0, Location::RequiresFpuRegister()); 3201 locations->SetInAt(1, Location::Any()); 3202 locations->SetOut(Location::SameAsFirstInput()); 3203 break; 3204 } 3205 default: 3206 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3207 } 3208 } 3209 3210 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { 3211 LocationSummary* locations = sub->GetLocations(); 3212 Location first = locations->InAt(0); 3213 Location second = locations->InAt(1); 3214 DCHECK(first.Equals(locations->Out())); 3215 switch (sub->GetResultType()) { 3216 case Primitive::kPrimInt: { 3217 if (second.IsRegister()) { 3218 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3219 } else if (second.IsConstant()) { 3220 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 3221 __ subl(first.AsRegister<CpuRegister>(), imm); 3222 } else { 3223 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3224 } 3225 break; 3226 } 3227 case Primitive::kPrimLong: { 3228 if (second.IsConstant()) { 3229 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3230 DCHECK(IsInt<32>(value)); 3231 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 3232 } else { 3233 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3234 } 3235 break; 3236 } 3237 3238 case Primitive::kPrimFloat: { 3239 if (second.IsFpuRegister()) { 3240 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3241 } else if (second.IsConstant()) { 3242 __ subss(first.AsFpuRegister<XmmRegister>(), 3243 codegen_->LiteralFloatAddress( 3244 second.GetConstant()->AsFloatConstant()->GetValue())); 3245 } else { 3246 DCHECK(second.IsStackSlot()); 3247 __ subss(first.AsFpuRegister<XmmRegister>(), 3248 Address(CpuRegister(RSP), second.GetStackIndex())); 3249 } 3250 break; 3251 } 3252 3253 case Primitive::kPrimDouble: { 3254 if (second.IsFpuRegister()) { 3255 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3256 } else if (second.IsConstant()) { 3257 __ subsd(first.AsFpuRegister<XmmRegister>(), 3258 codegen_->LiteralDoubleAddress( 3259 second.GetConstant()->AsDoubleConstant()->GetValue())); 3260 } else { 3261 DCHECK(second.IsDoubleStackSlot()); 3262 __ subsd(first.AsFpuRegister<XmmRegister>(), 3263 Address(CpuRegister(RSP), second.GetStackIndex())); 3264 } 3265 break; 3266 } 3267 3268 default: 3269 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3270 } 3271 } 3272 3273 void LocationsBuilderX86_64::VisitMul(HMul* mul) { 3274 LocationSummary* locations = 3275 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); 3276 switch (mul->GetResultType()) { 3277 case Primitive::kPrimInt: { 3278 locations->SetInAt(0, Location::RequiresRegister()); 3279 locations->SetInAt(1, Location::Any()); 3280 if (mul->InputAt(1)->IsIntConstant()) { 3281 // Can use 3 operand multiply. 3282 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3283 } else { 3284 locations->SetOut(Location::SameAsFirstInput()); 3285 } 3286 break; 3287 } 3288 case Primitive::kPrimLong: { 3289 locations->SetInAt(0, Location::RequiresRegister()); 3290 locations->SetInAt(1, Location::Any()); 3291 if (mul->InputAt(1)->IsLongConstant() && 3292 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) { 3293 // Can use 3 operand multiply. 3294 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3295 } else { 3296 locations->SetOut(Location::SameAsFirstInput()); 3297 } 3298 break; 3299 } 3300 case Primitive::kPrimFloat: 3301 case Primitive::kPrimDouble: { 3302 locations->SetInAt(0, Location::RequiresFpuRegister()); 3303 locations->SetInAt(1, Location::Any()); 3304 locations->SetOut(Location::SameAsFirstInput()); 3305 break; 3306 } 3307 3308 default: 3309 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3310 } 3311 } 3312 3313 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { 3314 LocationSummary* locations = mul->GetLocations(); 3315 Location first = locations->InAt(0); 3316 Location second = locations->InAt(1); 3317 Location out = locations->Out(); 3318 switch (mul->GetResultType()) { 3319 case Primitive::kPrimInt: 3320 // The constant may have ended up in a register, so test explicitly to avoid 3321 // problems where the output may not be the same as the first operand. 3322 if (mul->InputAt(1)->IsIntConstant()) { 3323 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue()); 3324 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm); 3325 } else if (second.IsRegister()) { 3326 DCHECK(first.Equals(out)); 3327 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3328 } else { 3329 DCHECK(first.Equals(out)); 3330 DCHECK(second.IsStackSlot()); 3331 __ imull(first.AsRegister<CpuRegister>(), 3332 Address(CpuRegister(RSP), second.GetStackIndex())); 3333 } 3334 break; 3335 case Primitive::kPrimLong: { 3336 // The constant may have ended up in a register, so test explicitly to avoid 3337 // problems where the output may not be the same as the first operand. 3338 if (mul->InputAt(1)->IsLongConstant()) { 3339 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue(); 3340 if (IsInt<32>(value)) { 3341 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), 3342 Immediate(static_cast<int32_t>(value))); 3343 } else { 3344 // Have to use the constant area. 3345 DCHECK(first.Equals(out)); 3346 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value)); 3347 } 3348 } else if (second.IsRegister()) { 3349 DCHECK(first.Equals(out)); 3350 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3351 } else { 3352 DCHECK(second.IsDoubleStackSlot()); 3353 DCHECK(first.Equals(out)); 3354 __ imulq(first.AsRegister<CpuRegister>(), 3355 Address(CpuRegister(RSP), second.GetStackIndex())); 3356 } 3357 break; 3358 } 3359 3360 case Primitive::kPrimFloat: { 3361 DCHECK(first.Equals(out)); 3362 if (second.IsFpuRegister()) { 3363 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3364 } else if (second.IsConstant()) { 3365 __ mulss(first.AsFpuRegister<XmmRegister>(), 3366 codegen_->LiteralFloatAddress( 3367 second.GetConstant()->AsFloatConstant()->GetValue())); 3368 } else { 3369 DCHECK(second.IsStackSlot()); 3370 __ mulss(first.AsFpuRegister<XmmRegister>(), 3371 Address(CpuRegister(RSP), second.GetStackIndex())); 3372 } 3373 break; 3374 } 3375 3376 case Primitive::kPrimDouble: { 3377 DCHECK(first.Equals(out)); 3378 if (second.IsFpuRegister()) { 3379 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3380 } else if (second.IsConstant()) { 3381 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3382 codegen_->LiteralDoubleAddress( 3383 second.GetConstant()->AsDoubleConstant()->GetValue())); 3384 } else { 3385 DCHECK(second.IsDoubleStackSlot()); 3386 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3387 Address(CpuRegister(RSP), second.GetStackIndex())); 3388 } 3389 break; 3390 } 3391 3392 default: 3393 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3394 } 3395 } 3396 3397 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, 3398 uint32_t stack_adjustment, bool is_float) { 3399 if (source.IsStackSlot()) { 3400 DCHECK(is_float); 3401 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3402 } else if (source.IsDoubleStackSlot()) { 3403 DCHECK(!is_float); 3404 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3405 } else { 3406 // Write the value to the temporary location on the stack and load to FP stack. 3407 if (is_float) { 3408 Location stack_temp = Location::StackSlot(temp_offset); 3409 codegen_->Move(stack_temp, source); 3410 __ flds(Address(CpuRegister(RSP), temp_offset)); 3411 } else { 3412 Location stack_temp = Location::DoubleStackSlot(temp_offset); 3413 codegen_->Move(stack_temp, source); 3414 __ fldl(Address(CpuRegister(RSP), temp_offset)); 3415 } 3416 } 3417 } 3418 3419 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { 3420 Primitive::Type type = rem->GetResultType(); 3421 bool is_float = type == Primitive::kPrimFloat; 3422 size_t elem_size = Primitive::ComponentSize(type); 3423 LocationSummary* locations = rem->GetLocations(); 3424 Location first = locations->InAt(0); 3425 Location second = locations->InAt(1); 3426 Location out = locations->Out(); 3427 3428 // Create stack space for 2 elements. 3429 // TODO: enhance register allocator to ask for stack temporaries. 3430 __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); 3431 3432 // Load the values to the FP stack in reverse order, using temporaries if needed. 3433 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); 3434 PushOntoFPStack(first, 0, 2 * elem_size, is_float); 3435 3436 // Loop doing FPREM until we stabilize. 3437 NearLabel retry; 3438 __ Bind(&retry); 3439 __ fprem(); 3440 3441 // Move FP status to AX. 3442 __ fstsw(); 3443 3444 // And see if the argument reduction is complete. This is signaled by the 3445 // C2 FPU flag bit set to 0. 3446 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); 3447 __ j(kNotEqual, &retry); 3448 3449 // We have settled on the final value. Retrieve it into an XMM register. 3450 // Store FP top of stack to real stack. 3451 if (is_float) { 3452 __ fsts(Address(CpuRegister(RSP), 0)); 3453 } else { 3454 __ fstl(Address(CpuRegister(RSP), 0)); 3455 } 3456 3457 // Pop the 2 items from the FP stack. 3458 __ fucompp(); 3459 3460 // Load the value from the stack into an XMM register. 3461 DCHECK(out.IsFpuRegister()) << out; 3462 if (is_float) { 3463 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3464 } else { 3465 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3466 } 3467 3468 // And remove the temporary stack space we allocated. 3469 __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); 3470 } 3471 3472 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 3473 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3474 3475 LocationSummary* locations = instruction->GetLocations(); 3476 Location second = locations->InAt(1); 3477 DCHECK(second.IsConstant()); 3478 3479 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3480 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>(); 3481 int64_t imm = Int64FromConstant(second.GetConstant()); 3482 3483 DCHECK(imm == 1 || imm == -1); 3484 3485 switch (instruction->GetResultType()) { 3486 case Primitive::kPrimInt: { 3487 if (instruction->IsRem()) { 3488 __ xorl(output_register, output_register); 3489 } else { 3490 __ movl(output_register, input_register); 3491 if (imm == -1) { 3492 __ negl(output_register); 3493 } 3494 } 3495 break; 3496 } 3497 3498 case Primitive::kPrimLong: { 3499 if (instruction->IsRem()) { 3500 __ xorl(output_register, output_register); 3501 } else { 3502 __ movq(output_register, input_register); 3503 if (imm == -1) { 3504 __ negq(output_register); 3505 } 3506 } 3507 break; 3508 } 3509 3510 default: 3511 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); 3512 } 3513 } 3514 3515 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { 3516 LocationSummary* locations = instruction->GetLocations(); 3517 Location second = locations->InAt(1); 3518 3519 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3520 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); 3521 3522 int64_t imm = Int64FromConstant(second.GetConstant()); 3523 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3524 uint64_t abs_imm = AbsOrMin(imm); 3525 3526 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 3527 3528 if (instruction->GetResultType() == Primitive::kPrimInt) { 3529 __ leal(tmp, Address(numerator, abs_imm - 1)); 3530 __ testl(numerator, numerator); 3531 __ cmov(kGreaterEqual, tmp, numerator); 3532 int shift = CTZ(imm); 3533 __ sarl(tmp, Immediate(shift)); 3534 3535 if (imm < 0) { 3536 __ negl(tmp); 3537 } 3538 3539 __ movl(output_register, tmp); 3540 } else { 3541 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); 3542 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); 3543 3544 codegen_->Load64BitValue(rdx, abs_imm - 1); 3545 __ addq(rdx, numerator); 3546 __ testq(numerator, numerator); 3547 __ cmov(kGreaterEqual, rdx, numerator); 3548 int shift = CTZ(imm); 3549 __ sarq(rdx, Immediate(shift)); 3550 3551 if (imm < 0) { 3552 __ negq(rdx); 3553 } 3554 3555 __ movq(output_register, rdx); 3556 } 3557 } 3558 3559 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3560 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3561 3562 LocationSummary* locations = instruction->GetLocations(); 3563 Location second = locations->InAt(1); 3564 3565 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>() 3566 : locations->GetTemp(0).AsRegister<CpuRegister>(); 3567 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>(); 3568 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>() 3569 : locations->Out().AsRegister<CpuRegister>(); 3570 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3571 3572 DCHECK_EQ(RAX, eax.AsRegister()); 3573 DCHECK_EQ(RDX, edx.AsRegister()); 3574 if (instruction->IsDiv()) { 3575 DCHECK_EQ(RAX, out.AsRegister()); 3576 } else { 3577 DCHECK_EQ(RDX, out.AsRegister()); 3578 } 3579 3580 int64_t magic; 3581 int shift; 3582 3583 // TODO: can these branches be written as one? 3584 if (instruction->GetResultType() == Primitive::kPrimInt) { 3585 int imm = second.GetConstant()->AsIntConstant()->GetValue(); 3586 3587 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); 3588 3589 __ movl(numerator, eax); 3590 3591 __ movl(eax, Immediate(magic)); 3592 __ imull(numerator); 3593 3594 if (imm > 0 && magic < 0) { 3595 __ addl(edx, numerator); 3596 } else if (imm < 0 && magic > 0) { 3597 __ subl(edx, numerator); 3598 } 3599 3600 if (shift != 0) { 3601 __ sarl(edx, Immediate(shift)); 3602 } 3603 3604 __ movl(eax, edx); 3605 __ shrl(edx, Immediate(31)); 3606 __ addl(edx, eax); 3607 3608 if (instruction->IsRem()) { 3609 __ movl(eax, numerator); 3610 __ imull(edx, Immediate(imm)); 3611 __ subl(eax, edx); 3612 __ movl(edx, eax); 3613 } else { 3614 __ movl(eax, edx); 3615 } 3616 } else { 3617 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); 3618 3619 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); 3620 3621 CpuRegister rax = eax; 3622 CpuRegister rdx = edx; 3623 3624 CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift); 3625 3626 // Save the numerator. 3627 __ movq(numerator, rax); 3628 3629 // RAX = magic 3630 codegen_->Load64BitValue(rax, magic); 3631 3632 // RDX:RAX = magic * numerator 3633 __ imulq(numerator); 3634 3635 if (imm > 0 && magic < 0) { 3636 // RDX += numerator 3637 __ addq(rdx, numerator); 3638 } else if (imm < 0 && magic > 0) { 3639 // RDX -= numerator 3640 __ subq(rdx, numerator); 3641 } 3642 3643 // Shift if needed. 3644 if (shift != 0) { 3645 __ sarq(rdx, Immediate(shift)); 3646 } 3647 3648 // RDX += 1 if RDX < 0 3649 __ movq(rax, rdx); 3650 __ shrq(rdx, Immediate(63)); 3651 __ addq(rdx, rax); 3652 3653 if (instruction->IsRem()) { 3654 __ movq(rax, numerator); 3655 3656 if (IsInt<32>(imm)) { 3657 __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); 3658 } else { 3659 __ imulq(rdx, codegen_->LiteralInt64Address(imm)); 3660 } 3661 3662 __ subq(rax, rdx); 3663 __ movq(rdx, rax); 3664 } else { 3665 __ movq(rax, rdx); 3666 } 3667 } 3668 } 3669 3670 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { 3671 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3672 Primitive::Type type = instruction->GetResultType(); 3673 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 3674 3675 bool is_div = instruction->IsDiv(); 3676 LocationSummary* locations = instruction->GetLocations(); 3677 3678 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3679 Location second = locations->InAt(1); 3680 3681 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); 3682 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister()); 3683 3684 if (second.IsConstant()) { 3685 int64_t imm = Int64FromConstant(second.GetConstant()); 3686 3687 if (imm == 0) { 3688 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 3689 } else if (imm == 1 || imm == -1) { 3690 DivRemOneOrMinusOne(instruction); 3691 } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { 3692 DivByPowerOfTwo(instruction->AsDiv()); 3693 } else { 3694 DCHECK(imm <= -2 || imm >= 2); 3695 GenerateDivRemWithAnyConstant(instruction); 3696 } 3697 } else { 3698 SlowPathCode* slow_path = 3699 new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( 3700 instruction, out.AsRegister(), type, is_div); 3701 codegen_->AddSlowPath(slow_path); 3702 3703 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3704 // 0x80000000(00000000)/-1 triggers an arithmetic exception! 3705 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) 3706 // so it's safe to just use negl instead of more complex comparisons. 3707 if (type == Primitive::kPrimInt) { 3708 __ cmpl(second_reg, Immediate(-1)); 3709 __ j(kEqual, slow_path->GetEntryLabel()); 3710 // edx:eax <- sign-extended of eax 3711 __ cdq(); 3712 // eax = quotient, edx = remainder 3713 __ idivl(second_reg); 3714 } else { 3715 __ cmpq(second_reg, Immediate(-1)); 3716 __ j(kEqual, slow_path->GetEntryLabel()); 3717 // rdx:rax <- sign-extended of rax 3718 __ cqo(); 3719 // rax = quotient, rdx = remainder 3720 __ idivq(second_reg); 3721 } 3722 __ Bind(slow_path->GetExitLabel()); 3723 } 3724 } 3725 3726 void LocationsBuilderX86_64::VisitDiv(HDiv* div) { 3727 LocationSummary* locations = 3728 new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); 3729 switch (div->GetResultType()) { 3730 case Primitive::kPrimInt: 3731 case Primitive::kPrimLong: { 3732 locations->SetInAt(0, Location::RegisterLocation(RAX)); 3733 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3734 locations->SetOut(Location::SameAsFirstInput()); 3735 // Intel uses edx:eax as the dividend. 3736 locations->AddTemp(Location::RegisterLocation(RDX)); 3737 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way 3738 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as 3739 // output and request another temp. 3740 if (div->InputAt(1)->IsConstant()) { 3741 locations->AddTemp(Location::RequiresRegister()); 3742 } 3743 break; 3744 } 3745 3746 case Primitive::kPrimFloat: 3747 case Primitive::kPrimDouble: { 3748 locations->SetInAt(0, Location::RequiresFpuRegister()); 3749 locations->SetInAt(1, Location::Any()); 3750 locations->SetOut(Location::SameAsFirstInput()); 3751 break; 3752 } 3753 3754 default: 3755 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3756 } 3757 } 3758 3759 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { 3760 LocationSummary* locations = div->GetLocations(); 3761 Location first = locations->InAt(0); 3762 Location second = locations->InAt(1); 3763 DCHECK(first.Equals(locations->Out())); 3764 3765 Primitive::Type type = div->GetResultType(); 3766 switch (type) { 3767 case Primitive::kPrimInt: 3768 case Primitive::kPrimLong: { 3769 GenerateDivRemIntegral(div); 3770 break; 3771 } 3772 3773 case Primitive::kPrimFloat: { 3774 if (second.IsFpuRegister()) { 3775 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3776 } else if (second.IsConstant()) { 3777 __ divss(first.AsFpuRegister<XmmRegister>(), 3778 codegen_->LiteralFloatAddress( 3779 second.GetConstant()->AsFloatConstant()->GetValue())); 3780 } else { 3781 DCHECK(second.IsStackSlot()); 3782 __ divss(first.AsFpuRegister<XmmRegister>(), 3783 Address(CpuRegister(RSP), second.GetStackIndex())); 3784 } 3785 break; 3786 } 3787 3788 case Primitive::kPrimDouble: { 3789 if (second.IsFpuRegister()) { 3790 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3791 } else if (second.IsConstant()) { 3792 __ divsd(first.AsFpuRegister<XmmRegister>(), 3793 codegen_->LiteralDoubleAddress( 3794 second.GetConstant()->AsDoubleConstant()->GetValue())); 3795 } else { 3796 DCHECK(second.IsDoubleStackSlot()); 3797 __ divsd(first.AsFpuRegister<XmmRegister>(), 3798 Address(CpuRegister(RSP), second.GetStackIndex())); 3799 } 3800 break; 3801 } 3802 3803 default: 3804 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3805 } 3806 } 3807 3808 void LocationsBuilderX86_64::VisitRem(HRem* rem) { 3809 Primitive::Type type = rem->GetResultType(); 3810 LocationSummary* locations = 3811 new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); 3812 3813 switch (type) { 3814 case Primitive::kPrimInt: 3815 case Primitive::kPrimLong: { 3816 locations->SetInAt(0, Location::RegisterLocation(RAX)); 3817 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 3818 // Intel uses rdx:rax as the dividend and puts the remainder in rdx 3819 locations->SetOut(Location::RegisterLocation(RDX)); 3820 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way 3821 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as 3822 // output and request another temp. 3823 if (rem->InputAt(1)->IsConstant()) { 3824 locations->AddTemp(Location::RequiresRegister()); 3825 } 3826 break; 3827 } 3828 3829 case Primitive::kPrimFloat: 3830 case Primitive::kPrimDouble: { 3831 locations->SetInAt(0, Location::Any()); 3832 locations->SetInAt(1, Location::Any()); 3833 locations->SetOut(Location::RequiresFpuRegister()); 3834 locations->AddTemp(Location::RegisterLocation(RAX)); 3835 break; 3836 } 3837 3838 default: 3839 LOG(FATAL) << "Unexpected rem type " << type; 3840 } 3841 } 3842 3843 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { 3844 Primitive::Type type = rem->GetResultType(); 3845 switch (type) { 3846 case Primitive::kPrimInt: 3847 case Primitive::kPrimLong: { 3848 GenerateDivRemIntegral(rem); 3849 break; 3850 } 3851 case Primitive::kPrimFloat: 3852 case Primitive::kPrimDouble: { 3853 GenerateRemFP(rem); 3854 break; 3855 } 3856 default: 3857 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); 3858 } 3859 } 3860 3861 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3862 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 3863 locations->SetInAt(0, Location::Any()); 3864 } 3865 3866 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3867 SlowPathCode* slow_path = 3868 new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction); 3869 codegen_->AddSlowPath(slow_path); 3870 3871 LocationSummary* locations = instruction->GetLocations(); 3872 Location value = locations->InAt(0); 3873 3874 switch (instruction->GetType()) { 3875 case Primitive::kPrimBoolean: 3876 case Primitive::kPrimByte: 3877 case Primitive::kPrimChar: 3878 case Primitive::kPrimShort: 3879 case Primitive::kPrimInt: { 3880 if (value.IsRegister()) { 3881 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 3882 __ j(kEqual, slow_path->GetEntryLabel()); 3883 } else if (value.IsStackSlot()) { 3884 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 3885 __ j(kEqual, slow_path->GetEntryLabel()); 3886 } else { 3887 DCHECK(value.IsConstant()) << value; 3888 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { 3889 __ jmp(slow_path->GetEntryLabel()); 3890 } 3891 } 3892 break; 3893 } 3894 case Primitive::kPrimLong: { 3895 if (value.IsRegister()) { 3896 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 3897 __ j(kEqual, slow_path->GetEntryLabel()); 3898 } else if (value.IsDoubleStackSlot()) { 3899 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 3900 __ j(kEqual, slow_path->GetEntryLabel()); 3901 } else { 3902 DCHECK(value.IsConstant()) << value; 3903 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { 3904 __ jmp(slow_path->GetEntryLabel()); 3905 } 3906 } 3907 break; 3908 } 3909 default: 3910 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); 3911 } 3912 } 3913 3914 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) { 3915 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 3916 3917 LocationSummary* locations = 3918 new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); 3919 3920 switch (op->GetResultType()) { 3921 case Primitive::kPrimInt: 3922 case Primitive::kPrimLong: { 3923 locations->SetInAt(0, Location::RequiresRegister()); 3924 // The shift count needs to be in CL. 3925 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1))); 3926 locations->SetOut(Location::SameAsFirstInput()); 3927 break; 3928 } 3929 default: 3930 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 3931 } 3932 } 3933 3934 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { 3935 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 3936 3937 LocationSummary* locations = op->GetLocations(); 3938 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 3939 Location second = locations->InAt(1); 3940 3941 switch (op->GetResultType()) { 3942 case Primitive::kPrimInt: { 3943 if (second.IsRegister()) { 3944 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3945 if (op->IsShl()) { 3946 __ shll(first_reg, second_reg); 3947 } else if (op->IsShr()) { 3948 __ sarl(first_reg, second_reg); 3949 } else { 3950 __ shrl(first_reg, second_reg); 3951 } 3952 } else { 3953 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 3954 if (op->IsShl()) { 3955 __ shll(first_reg, imm); 3956 } else if (op->IsShr()) { 3957 __ sarl(first_reg, imm); 3958 } else { 3959 __ shrl(first_reg, imm); 3960 } 3961 } 3962 break; 3963 } 3964 case Primitive::kPrimLong: { 3965 if (second.IsRegister()) { 3966 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3967 if (op->IsShl()) { 3968 __ shlq(first_reg, second_reg); 3969 } else if (op->IsShr()) { 3970 __ sarq(first_reg, second_reg); 3971 } else { 3972 __ shrq(first_reg, second_reg); 3973 } 3974 } else { 3975 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 3976 if (op->IsShl()) { 3977 __ shlq(first_reg, imm); 3978 } else if (op->IsShr()) { 3979 __ sarq(first_reg, imm); 3980 } else { 3981 __ shrq(first_reg, imm); 3982 } 3983 } 3984 break; 3985 } 3986 default: 3987 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 3988 UNREACHABLE(); 3989 } 3990 } 3991 3992 void LocationsBuilderX86_64::VisitRor(HRor* ror) { 3993 LocationSummary* locations = 3994 new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); 3995 3996 switch (ror->GetResultType()) { 3997 case Primitive::kPrimInt: 3998 case Primitive::kPrimLong: { 3999 locations->SetInAt(0, Location::RequiresRegister()); 4000 // The shift count needs to be in CL (unless it is a constant). 4001 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1))); 4002 locations->SetOut(Location::SameAsFirstInput()); 4003 break; 4004 } 4005 default: 4006 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4007 UNREACHABLE(); 4008 } 4009 } 4010 4011 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { 4012 LocationSummary* locations = ror->GetLocations(); 4013 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 4014 Location second = locations->InAt(1); 4015 4016 switch (ror->GetResultType()) { 4017 case Primitive::kPrimInt: 4018 if (second.IsRegister()) { 4019 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4020 __ rorl(first_reg, second_reg); 4021 } else { 4022 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 4023 __ rorl(first_reg, imm); 4024 } 4025 break; 4026 case Primitive::kPrimLong: 4027 if (second.IsRegister()) { 4028 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4029 __ rorq(first_reg, second_reg); 4030 } else { 4031 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 4032 __ rorq(first_reg, imm); 4033 } 4034 break; 4035 default: 4036 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4037 UNREACHABLE(); 4038 } 4039 } 4040 4041 void LocationsBuilderX86_64::VisitShl(HShl* shl) { 4042 HandleShift(shl); 4043 } 4044 4045 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) { 4046 HandleShift(shl); 4047 } 4048 4049 void LocationsBuilderX86_64::VisitShr(HShr* shr) { 4050 HandleShift(shr); 4051 } 4052 4053 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) { 4054 HandleShift(shr); 4055 } 4056 4057 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) { 4058 HandleShift(ushr); 4059 } 4060 4061 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) { 4062 HandleShift(ushr); 4063 } 4064 4065 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { 4066 LocationSummary* locations = 4067 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 4068 InvokeRuntimeCallingConvention calling_convention; 4069 if (instruction->IsStringAlloc()) { 4070 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); 4071 } else { 4072 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4073 } 4074 locations->SetOut(Location::RegisterLocation(RAX)); 4075 } 4076 4077 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { 4078 // Note: if heap poisoning is enabled, the entry point takes cares 4079 // of poisoning the reference. 4080 if (instruction->IsStringAlloc()) { 4081 // String is allocated through StringFactory. Call NewEmptyString entry point. 4082 CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); 4083 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize); 4084 __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true)); 4085 __ call(Address(temp, code_offset.SizeValue())); 4086 codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); 4087 } else { 4088 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 4089 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 4090 DCHECK(!codegen_->IsLeafMethod()); 4091 } 4092 } 4093 4094 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { 4095 LocationSummary* locations = 4096 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 4097 InvokeRuntimeCallingConvention calling_convention; 4098 locations->SetOut(Location::RegisterLocation(RAX)); 4099 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4100 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 4101 } 4102 4103 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { 4104 // Note: if heap poisoning is enabled, the entry point takes cares 4105 // of poisoning the reference. 4106 QuickEntrypointEnum entrypoint = 4107 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); 4108 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 4109 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 4110 DCHECK(!codegen_->IsLeafMethod()); 4111 } 4112 4113 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { 4114 LocationSummary* locations = 4115 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 4116 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 4117 if (location.IsStackSlot()) { 4118 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4119 } else if (location.IsDoubleStackSlot()) { 4120 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4121 } 4122 locations->SetOut(location); 4123 } 4124 4125 void InstructionCodeGeneratorX86_64::VisitParameterValue( 4126 HParameterValue* instruction ATTRIBUTE_UNUSED) { 4127 // Nothing to do, the parameter is already at its location. 4128 } 4129 4130 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) { 4131 LocationSummary* locations = 4132 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 4133 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); 4134 } 4135 4136 void InstructionCodeGeneratorX86_64::VisitCurrentMethod( 4137 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 4138 // Nothing to do, the method is already at its location. 4139 } 4140 4141 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4142 LocationSummary* locations = 4143 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 4144 locations->SetInAt(0, Location::RequiresRegister()); 4145 locations->SetOut(Location::RequiresRegister()); 4146 } 4147 4148 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4149 LocationSummary* locations = instruction->GetLocations(); 4150 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 4151 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4152 instruction->GetIndex(), kX86_64PointerSize).SizeValue(); 4153 __ movq(locations->Out().AsRegister<CpuRegister>(), 4154 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); 4155 } else { 4156 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4157 instruction->GetIndex(), kX86_64PointerSize)); 4158 __ movq(locations->Out().AsRegister<CpuRegister>(), 4159 Address(locations->InAt(0).AsRegister<CpuRegister>(), 4160 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 4161 __ movq(locations->Out().AsRegister<CpuRegister>(), 4162 Address(locations->Out().AsRegister<CpuRegister>(), method_offset)); 4163 } 4164 } 4165 4166 void LocationsBuilderX86_64::VisitNot(HNot* not_) { 4167 LocationSummary* locations = 4168 new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); 4169 locations->SetInAt(0, Location::RequiresRegister()); 4170 locations->SetOut(Location::SameAsFirstInput()); 4171 } 4172 4173 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { 4174 LocationSummary* locations = not_->GetLocations(); 4175 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4176 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4177 Location out = locations->Out(); 4178 switch (not_->GetResultType()) { 4179 case Primitive::kPrimInt: 4180 __ notl(out.AsRegister<CpuRegister>()); 4181 break; 4182 4183 case Primitive::kPrimLong: 4184 __ notq(out.AsRegister<CpuRegister>()); 4185 break; 4186 4187 default: 4188 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); 4189 } 4190 } 4191 4192 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4193 LocationSummary* locations = 4194 new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); 4195 locations->SetInAt(0, Location::RequiresRegister()); 4196 locations->SetOut(Location::SameAsFirstInput()); 4197 } 4198 4199 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4200 LocationSummary* locations = bool_not->GetLocations(); 4201 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4202 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4203 Location out = locations->Out(); 4204 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1)); 4205 } 4206 4207 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { 4208 LocationSummary* locations = 4209 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 4210 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 4211 locations->SetInAt(i, Location::Any()); 4212 } 4213 locations->SetOut(Location::Any()); 4214 } 4215 4216 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 4217 LOG(FATAL) << "Unimplemented"; 4218 } 4219 4220 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { 4221 /* 4222 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence. 4223 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model. 4224 * For those cases, all we need to ensure is that there is a scheduling barrier in place. 4225 */ 4226 switch (kind) { 4227 case MemBarrierKind::kAnyAny: { 4228 MemoryFence(); 4229 break; 4230 } 4231 case MemBarrierKind::kAnyStore: 4232 case MemBarrierKind::kLoadAny: 4233 case MemBarrierKind::kStoreStore: { 4234 // nop 4235 break; 4236 } 4237 case MemBarrierKind::kNTStoreStore: 4238 // Non-Temporal Store/Store needs an explicit fence. 4239 MemoryFence(/* non-temporal */ true); 4240 break; 4241 } 4242 } 4243 4244 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { 4245 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4246 4247 bool object_field_get_with_read_barrier = 4248 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); 4249 LocationSummary* locations = 4250 new (GetGraph()->GetArena()) LocationSummary(instruction, 4251 object_field_get_with_read_barrier ? 4252 LocationSummary::kCallOnSlowPath : 4253 LocationSummary::kNoCall); 4254 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 4255 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4256 } 4257 locations->SetInAt(0, Location::RequiresRegister()); 4258 if (Primitive::IsFloatingPointType(instruction->GetType())) { 4259 locations->SetOut(Location::RequiresFpuRegister()); 4260 } else { 4261 // The output overlaps for an object field get when read barriers 4262 // are enabled: we do not want the move to overwrite the object's 4263 // location, as we need it to emit the read barrier. 4264 locations->SetOut( 4265 Location::RequiresRegister(), 4266 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 4267 } 4268 } 4269 4270 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, 4271 const FieldInfo& field_info) { 4272 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4273 4274 LocationSummary* locations = instruction->GetLocations(); 4275 Location base_loc = locations->InAt(0); 4276 CpuRegister base = base_loc.AsRegister<CpuRegister>(); 4277 Location out = locations->Out(); 4278 bool is_volatile = field_info.IsVolatile(); 4279 Primitive::Type field_type = field_info.GetFieldType(); 4280 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4281 4282 switch (field_type) { 4283 case Primitive::kPrimBoolean: { 4284 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4285 break; 4286 } 4287 4288 case Primitive::kPrimByte: { 4289 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4290 break; 4291 } 4292 4293 case Primitive::kPrimShort: { 4294 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4295 break; 4296 } 4297 4298 case Primitive::kPrimChar: { 4299 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4300 break; 4301 } 4302 4303 case Primitive::kPrimInt: { 4304 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4305 break; 4306 } 4307 4308 case Primitive::kPrimNot: { 4309 // /* HeapReference<Object> */ out = *(base + offset) 4310 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 4311 // Note that a potential implicit null check is handled in this 4312 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. 4313 codegen_->GenerateFieldLoadWithBakerReadBarrier( 4314 instruction, out, base, offset, /* needs_null_check */ true); 4315 if (is_volatile) { 4316 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4317 } 4318 } else { 4319 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4320 codegen_->MaybeRecordImplicitNullCheck(instruction); 4321 if (is_volatile) { 4322 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4323 } 4324 // If read barriers are enabled, emit read barriers other than 4325 // Baker's using a slow path (and also unpoison the loaded 4326 // reference, if heap poisoning is enabled). 4327 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 4328 } 4329 break; 4330 } 4331 4332 case Primitive::kPrimLong: { 4333 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); 4334 break; 4335 } 4336 4337 case Primitive::kPrimFloat: { 4338 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4339 break; 4340 } 4341 4342 case Primitive::kPrimDouble: { 4343 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4344 break; 4345 } 4346 4347 case Primitive::kPrimVoid: 4348 LOG(FATAL) << "Unreachable type " << field_type; 4349 UNREACHABLE(); 4350 } 4351 4352 if (field_type == Primitive::kPrimNot) { 4353 // Potential implicit null checks, in the case of reference 4354 // fields, are handled in the previous switch statement. 4355 } else { 4356 codegen_->MaybeRecordImplicitNullCheck(instruction); 4357 } 4358 4359 if (is_volatile) { 4360 if (field_type == Primitive::kPrimNot) { 4361 // Memory barriers, in the case of references, are also handled 4362 // in the previous switch statement. 4363 } else { 4364 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4365 } 4366 } 4367 } 4368 4369 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, 4370 const FieldInfo& field_info) { 4371 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4372 4373 LocationSummary* locations = 4374 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 4375 Primitive::Type field_type = field_info.GetFieldType(); 4376 bool is_volatile = field_info.IsVolatile(); 4377 bool needs_write_barrier = 4378 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); 4379 4380 locations->SetInAt(0, Location::RequiresRegister()); 4381 if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 4382 if (is_volatile) { 4383 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4384 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); 4385 } else { 4386 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); 4387 } 4388 } else { 4389 if (is_volatile) { 4390 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4391 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1))); 4392 } else { 4393 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4394 } 4395 } 4396 if (needs_write_barrier) { 4397 // Temporary registers for the write barrier. 4398 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 4399 locations->AddTemp(Location::RequiresRegister()); 4400 } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) { 4401 // Temporary register for the reference poisoning. 4402 locations->AddTemp(Location::RequiresRegister()); 4403 } 4404 } 4405 4406 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, 4407 const FieldInfo& field_info, 4408 bool value_can_be_null) { 4409 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4410 4411 LocationSummary* locations = instruction->GetLocations(); 4412 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); 4413 Location value = locations->InAt(1); 4414 bool is_volatile = field_info.IsVolatile(); 4415 Primitive::Type field_type = field_info.GetFieldType(); 4416 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4417 4418 if (is_volatile) { 4419 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); 4420 } 4421 4422 bool maybe_record_implicit_null_check_done = false; 4423 4424 switch (field_type) { 4425 case Primitive::kPrimBoolean: 4426 case Primitive::kPrimByte: { 4427 if (value.IsConstant()) { 4428 int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4429 __ movb(Address(base, offset), Immediate(v)); 4430 } else { 4431 __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); 4432 } 4433 break; 4434 } 4435 4436 case Primitive::kPrimShort: 4437 case Primitive::kPrimChar: { 4438 if (value.IsConstant()) { 4439 int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4440 __ movw(Address(base, offset), Immediate(v)); 4441 } else { 4442 __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); 4443 } 4444 break; 4445 } 4446 4447 case Primitive::kPrimInt: 4448 case Primitive::kPrimNot: { 4449 if (value.IsConstant()) { 4450 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4451 // `field_type == Primitive::kPrimNot` implies `v == 0`. 4452 DCHECK((field_type != Primitive::kPrimNot) || (v == 0)); 4453 // Note: if heap poisoning is enabled, no need to poison 4454 // (negate) `v` if it is a reference, as it would be null. 4455 __ movl(Address(base, offset), Immediate(v)); 4456 } else { 4457 if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) { 4458 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4459 __ movl(temp, value.AsRegister<CpuRegister>()); 4460 __ PoisonHeapReference(temp); 4461 __ movl(Address(base, offset), temp); 4462 } else { 4463 __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); 4464 } 4465 } 4466 break; 4467 } 4468 4469 case Primitive::kPrimLong: { 4470 if (value.IsConstant()) { 4471 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 4472 codegen_->MoveInt64ToAddress(Address(base, offset), 4473 Address(base, offset + sizeof(int32_t)), 4474 v, 4475 instruction); 4476 maybe_record_implicit_null_check_done = true; 4477 } else { 4478 __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); 4479 } 4480 break; 4481 } 4482 4483 case Primitive::kPrimFloat: { 4484 if (value.IsConstant()) { 4485 int32_t v = 4486 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 4487 __ movl(Address(base, offset), Immediate(v)); 4488 } else { 4489 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4490 } 4491 break; 4492 } 4493 4494 case Primitive::kPrimDouble: { 4495 if (value.IsConstant()) { 4496 int64_t v = 4497 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 4498 codegen_->MoveInt64ToAddress(Address(base, offset), 4499 Address(base, offset + sizeof(int32_t)), 4500 v, 4501 instruction); 4502 maybe_record_implicit_null_check_done = true; 4503 } else { 4504 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4505 } 4506 break; 4507 } 4508 4509 case Primitive::kPrimVoid: 4510 LOG(FATAL) << "Unreachable type " << field_type; 4511 UNREACHABLE(); 4512 } 4513 4514 if (!maybe_record_implicit_null_check_done) { 4515 codegen_->MaybeRecordImplicitNullCheck(instruction); 4516 } 4517 4518 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 4519 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4520 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 4521 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null); 4522 } 4523 4524 if (is_volatile) { 4525 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 4526 } 4527 } 4528 4529 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4530 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4531 } 4532 4533 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4534 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4535 } 4536 4537 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4538 HandleFieldGet(instruction); 4539 } 4540 4541 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4542 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4543 } 4544 4545 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4546 HandleFieldGet(instruction); 4547 } 4548 4549 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4550 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4551 } 4552 4553 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4554 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4555 } 4556 4557 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4558 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4559 } 4560 4561 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet( 4562 HUnresolvedInstanceFieldGet* instruction) { 4563 FieldAccessCallingConventionX86_64 calling_convention; 4564 codegen_->CreateUnresolvedFieldLocationSummary( 4565 instruction, instruction->GetFieldType(), calling_convention); 4566 } 4567 4568 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet( 4569 HUnresolvedInstanceFieldGet* instruction) { 4570 FieldAccessCallingConventionX86_64 calling_convention; 4571 codegen_->GenerateUnresolvedFieldAccess(instruction, 4572 instruction->GetFieldType(), 4573 instruction->GetFieldIndex(), 4574 instruction->GetDexPc(), 4575 calling_convention); 4576 } 4577 4578 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet( 4579 HUnresolvedInstanceFieldSet* instruction) { 4580 FieldAccessCallingConventionX86_64 calling_convention; 4581 codegen_->CreateUnresolvedFieldLocationSummary( 4582 instruction, instruction->GetFieldType(), calling_convention); 4583 } 4584 4585 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet( 4586 HUnresolvedInstanceFieldSet* instruction) { 4587 FieldAccessCallingConventionX86_64 calling_convention; 4588 codegen_->GenerateUnresolvedFieldAccess(instruction, 4589 instruction->GetFieldType(), 4590 instruction->GetFieldIndex(), 4591 instruction->GetDexPc(), 4592 calling_convention); 4593 } 4594 4595 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet( 4596 HUnresolvedStaticFieldGet* instruction) { 4597 FieldAccessCallingConventionX86_64 calling_convention; 4598 codegen_->CreateUnresolvedFieldLocationSummary( 4599 instruction, instruction->GetFieldType(), calling_convention); 4600 } 4601 4602 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet( 4603 HUnresolvedStaticFieldGet* instruction) { 4604 FieldAccessCallingConventionX86_64 calling_convention; 4605 codegen_->GenerateUnresolvedFieldAccess(instruction, 4606 instruction->GetFieldType(), 4607 instruction->GetFieldIndex(), 4608 instruction->GetDexPc(), 4609 calling_convention); 4610 } 4611 4612 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet( 4613 HUnresolvedStaticFieldSet* instruction) { 4614 FieldAccessCallingConventionX86_64 calling_convention; 4615 codegen_->CreateUnresolvedFieldLocationSummary( 4616 instruction, instruction->GetFieldType(), calling_convention); 4617 } 4618 4619 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet( 4620 HUnresolvedStaticFieldSet* instruction) { 4621 FieldAccessCallingConventionX86_64 calling_convention; 4622 codegen_->GenerateUnresolvedFieldAccess(instruction, 4623 instruction->GetFieldType(), 4624 instruction->GetFieldIndex(), 4625 instruction->GetDexPc(), 4626 calling_convention); 4627 } 4628 4629 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { 4630 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 4631 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() 4632 ? Location::RequiresRegister() 4633 : Location::Any(); 4634 locations->SetInAt(0, loc); 4635 } 4636 4637 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { 4638 if (CanMoveNullCheckToUser(instruction)) { 4639 return; 4640 } 4641 LocationSummary* locations = instruction->GetLocations(); 4642 Location obj = locations->InAt(0); 4643 4644 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0)); 4645 RecordPcInfo(instruction, instruction->GetDexPc()); 4646 } 4647 4648 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) { 4649 SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction); 4650 AddSlowPath(slow_path); 4651 4652 LocationSummary* locations = instruction->GetLocations(); 4653 Location obj = locations->InAt(0); 4654 4655 if (obj.IsRegister()) { 4656 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>()); 4657 } else if (obj.IsStackSlot()) { 4658 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); 4659 } else { 4660 DCHECK(obj.IsConstant()) << obj; 4661 DCHECK(obj.GetConstant()->IsNullConstant()); 4662 __ jmp(slow_path->GetEntryLabel()); 4663 return; 4664 } 4665 __ j(kEqual, slow_path->GetEntryLabel()); 4666 } 4667 4668 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { 4669 codegen_->GenerateNullCheck(instruction); 4670 } 4671 4672 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { 4673 bool object_array_get_with_read_barrier = 4674 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); 4675 LocationSummary* locations = 4676 new (GetGraph()->GetArena()) LocationSummary(instruction, 4677 object_array_get_with_read_barrier ? 4678 LocationSummary::kCallOnSlowPath : 4679 LocationSummary::kNoCall); 4680 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 4681 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4682 } 4683 locations->SetInAt(0, Location::RequiresRegister()); 4684 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4685 if (Primitive::IsFloatingPointType(instruction->GetType())) { 4686 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4687 } else { 4688 // The output overlaps for an object array get when read barriers 4689 // are enabled: we do not want the move to overwrite the array's 4690 // location, as we need it to emit the read barrier. 4691 locations->SetOut( 4692 Location::RequiresRegister(), 4693 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 4694 } 4695 } 4696 4697 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { 4698 LocationSummary* locations = instruction->GetLocations(); 4699 Location obj_loc = locations->InAt(0); 4700 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 4701 Location index = locations->InAt(1); 4702 Location out_loc = locations->Out(); 4703 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); 4704 4705 Primitive::Type type = instruction->GetType(); 4706 switch (type) { 4707 case Primitive::kPrimBoolean: { 4708 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4709 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 4710 break; 4711 } 4712 4713 case Primitive::kPrimByte: { 4714 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4715 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 4716 break; 4717 } 4718 4719 case Primitive::kPrimShort: { 4720 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4721 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 4722 break; 4723 } 4724 4725 case Primitive::kPrimChar: { 4726 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4727 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 4728 // Branch cases into compressed and uncompressed for each index's type. 4729 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 4730 NearLabel done, not_compressed; 4731 __ testb(Address(obj, count_offset), Immediate(1)); 4732 codegen_->MaybeRecordImplicitNullCheck(instruction); 4733 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 4734 "Expecting 0=compressed, 1=uncompressed"); 4735 __ j(kNotZero, ¬_compressed); 4736 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 4737 __ jmp(&done); 4738 __ Bind(¬_compressed); 4739 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 4740 __ Bind(&done); 4741 } else { 4742 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 4743 } 4744 break; 4745 } 4746 4747 case Primitive::kPrimInt: { 4748 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4749 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 4750 break; 4751 } 4752 4753 case Primitive::kPrimNot: { 4754 static_assert( 4755 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 4756 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 4757 // /* HeapReference<Object> */ out = 4758 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 4759 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 4760 // Note that a potential implicit null check is handled in this 4761 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. 4762 codegen_->GenerateArrayLoadWithBakerReadBarrier( 4763 instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); 4764 } else { 4765 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4766 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 4767 codegen_->MaybeRecordImplicitNullCheck(instruction); 4768 // If read barriers are enabled, emit read barriers other than 4769 // Baker's using a slow path (and also unpoison the loaded 4770 // reference, if heap poisoning is enabled). 4771 if (index.IsConstant()) { 4772 uint32_t offset = 4773 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; 4774 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); 4775 } else { 4776 codegen_->MaybeGenerateReadBarrierSlow( 4777 instruction, out_loc, out_loc, obj_loc, data_offset, index); 4778 } 4779 } 4780 break; 4781 } 4782 4783 case Primitive::kPrimLong: { 4784 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4785 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 4786 break; 4787 } 4788 4789 case Primitive::kPrimFloat: { 4790 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 4791 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 4792 break; 4793 } 4794 4795 case Primitive::kPrimDouble: { 4796 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 4797 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 4798 break; 4799 } 4800 4801 case Primitive::kPrimVoid: 4802 LOG(FATAL) << "Unreachable type " << type; 4803 UNREACHABLE(); 4804 } 4805 4806 if (type == Primitive::kPrimNot) { 4807 // Potential implicit null checks, in the case of reference 4808 // arrays, are handled in the previous switch statement. 4809 } else { 4810 codegen_->MaybeRecordImplicitNullCheck(instruction); 4811 } 4812 } 4813 4814 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { 4815 Primitive::Type value_type = instruction->GetComponentType(); 4816 4817 bool needs_write_barrier = 4818 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 4819 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 4820 4821 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( 4822 instruction, 4823 may_need_runtime_call_for_type_check ? 4824 LocationSummary::kCallOnSlowPath : 4825 LocationSummary::kNoCall); 4826 4827 locations->SetInAt(0, Location::RequiresRegister()); 4828 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4829 if (Primitive::IsFloatingPointType(value_type)) { 4830 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); 4831 } else { 4832 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); 4833 } 4834 4835 if (needs_write_barrier) { 4836 // Temporary registers for the write barrier. 4837 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. 4838 locations->AddTemp(Location::RequiresRegister()); 4839 } 4840 } 4841 4842 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { 4843 LocationSummary* locations = instruction->GetLocations(); 4844 Location array_loc = locations->InAt(0); 4845 CpuRegister array = array_loc.AsRegister<CpuRegister>(); 4846 Location index = locations->InAt(1); 4847 Location value = locations->InAt(2); 4848 Primitive::Type value_type = instruction->GetComponentType(); 4849 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 4850 bool needs_write_barrier = 4851 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 4852 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 4853 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 4854 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 4855 4856 switch (value_type) { 4857 case Primitive::kPrimBoolean: 4858 case Primitive::kPrimByte: { 4859 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); 4860 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset); 4861 if (value.IsRegister()) { 4862 __ movb(address, value.AsRegister<CpuRegister>()); 4863 } else { 4864 __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); 4865 } 4866 codegen_->MaybeRecordImplicitNullCheck(instruction); 4867 break; 4868 } 4869 4870 case Primitive::kPrimShort: 4871 case Primitive::kPrimChar: { 4872 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); 4873 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset); 4874 if (value.IsRegister()) { 4875 __ movw(address, value.AsRegister<CpuRegister>()); 4876 } else { 4877 DCHECK(value.IsConstant()) << value; 4878 __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); 4879 } 4880 codegen_->MaybeRecordImplicitNullCheck(instruction); 4881 break; 4882 } 4883 4884 case Primitive::kPrimNot: { 4885 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 4886 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 4887 4888 if (!value.IsRegister()) { 4889 // Just setting null. 4890 DCHECK(instruction->InputAt(2)->IsNullConstant()); 4891 DCHECK(value.IsConstant()) << value; 4892 __ movl(address, Immediate(0)); 4893 codegen_->MaybeRecordImplicitNullCheck(instruction); 4894 DCHECK(!needs_write_barrier); 4895 DCHECK(!may_need_runtime_call_for_type_check); 4896 break; 4897 } 4898 4899 DCHECK(needs_write_barrier); 4900 CpuRegister register_value = value.AsRegister<CpuRegister>(); 4901 // We cannot use a NearLabel for `done`, as its range may be too 4902 // short when Baker read barriers are enabled. 4903 Label done; 4904 NearLabel not_null, do_put; 4905 SlowPathCode* slow_path = nullptr; 4906 Location temp_loc = locations->GetTemp(0); 4907 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 4908 if (may_need_runtime_call_for_type_check) { 4909 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction); 4910 codegen_->AddSlowPath(slow_path); 4911 if (instruction->GetValueCanBeNull()) { 4912 __ testl(register_value, register_value); 4913 __ j(kNotEqual, ¬_null); 4914 __ movl(address, Immediate(0)); 4915 codegen_->MaybeRecordImplicitNullCheck(instruction); 4916 __ jmp(&done); 4917 __ Bind(¬_null); 4918 } 4919 4920 // Note that when Baker read barriers are enabled, the type 4921 // checks are performed without read barriers. This is fine, 4922 // even in the case where a class object is in the from-space 4923 // after the flip, as a comparison involving such a type would 4924 // not produce a false positive; it may of course produce a 4925 // false negative, in which case we would take the ArraySet 4926 // slow path. 4927 4928 // /* HeapReference<Class> */ temp = array->klass_ 4929 __ movl(temp, Address(array, class_offset)); 4930 codegen_->MaybeRecordImplicitNullCheck(instruction); 4931 __ MaybeUnpoisonHeapReference(temp); 4932 4933 // /* HeapReference<Class> */ temp = temp->component_type_ 4934 __ movl(temp, Address(temp, component_offset)); 4935 // If heap poisoning is enabled, no need to unpoison `temp` 4936 // nor the object reference in `register_value->klass`, as 4937 // we are comparing two poisoned references. 4938 __ cmpl(temp, Address(register_value, class_offset)); 4939 4940 if (instruction->StaticTypeOfArrayIsObjectArray()) { 4941 __ j(kEqual, &do_put); 4942 // If heap poisoning is enabled, the `temp` reference has 4943 // not been unpoisoned yet; unpoison it now. 4944 __ MaybeUnpoisonHeapReference(temp); 4945 4946 // If heap poisoning is enabled, no need to unpoison the 4947 // heap reference loaded below, as it is only used for a 4948 // comparison with null. 4949 __ cmpl(Address(temp, super_offset), Immediate(0)); 4950 __ j(kNotEqual, slow_path->GetEntryLabel()); 4951 __ Bind(&do_put); 4952 } else { 4953 __ j(kNotEqual, slow_path->GetEntryLabel()); 4954 } 4955 } 4956 4957 if (kPoisonHeapReferences) { 4958 __ movl(temp, register_value); 4959 __ PoisonHeapReference(temp); 4960 __ movl(address, temp); 4961 } else { 4962 __ movl(address, register_value); 4963 } 4964 if (!may_need_runtime_call_for_type_check) { 4965 codegen_->MaybeRecordImplicitNullCheck(instruction); 4966 } 4967 4968 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 4969 codegen_->MarkGCCard( 4970 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull()); 4971 __ Bind(&done); 4972 4973 if (slow_path != nullptr) { 4974 __ Bind(slow_path->GetExitLabel()); 4975 } 4976 4977 break; 4978 } 4979 4980 case Primitive::kPrimInt: { 4981 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 4982 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 4983 if (value.IsRegister()) { 4984 __ movl(address, value.AsRegister<CpuRegister>()); 4985 } else { 4986 DCHECK(value.IsConstant()) << value; 4987 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4988 __ movl(address, Immediate(v)); 4989 } 4990 codegen_->MaybeRecordImplicitNullCheck(instruction); 4991 break; 4992 } 4993 4994 case Primitive::kPrimLong: { 4995 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); 4996 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 4997 if (value.IsRegister()) { 4998 __ movq(address, value.AsRegister<CpuRegister>()); 4999 codegen_->MaybeRecordImplicitNullCheck(instruction); 5000 } else { 5001 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 5002 Address address_high = 5003 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 5004 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 5005 } 5006 break; 5007 } 5008 5009 case Primitive::kPrimFloat: { 5010 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); 5011 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 5012 if (value.IsFpuRegister()) { 5013 __ movss(address, value.AsFpuRegister<XmmRegister>()); 5014 } else { 5015 DCHECK(value.IsConstant()); 5016 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 5017 __ movl(address, Immediate(v)); 5018 } 5019 codegen_->MaybeRecordImplicitNullCheck(instruction); 5020 break; 5021 } 5022 5023 case Primitive::kPrimDouble: { 5024 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); 5025 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 5026 if (value.IsFpuRegister()) { 5027 __ movsd(address, value.AsFpuRegister<XmmRegister>()); 5028 codegen_->MaybeRecordImplicitNullCheck(instruction); 5029 } else { 5030 int64_t v = 5031 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 5032 Address address_high = 5033 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 5034 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 5035 } 5036 break; 5037 } 5038 5039 case Primitive::kPrimVoid: 5040 LOG(FATAL) << "Unreachable type " << instruction->GetType(); 5041 UNREACHABLE(); 5042 } 5043 } 5044 5045 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { 5046 LocationSummary* locations = 5047 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 5048 locations->SetInAt(0, Location::RequiresRegister()); 5049 if (!instruction->IsEmittedAtUseSite()) { 5050 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5051 } 5052 } 5053 5054 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { 5055 if (instruction->IsEmittedAtUseSite()) { 5056 return; 5057 } 5058 5059 LocationSummary* locations = instruction->GetLocations(); 5060 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 5061 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 5062 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 5063 __ movl(out, Address(obj, offset)); 5064 codegen_->MaybeRecordImplicitNullCheck(instruction); 5065 // Mask out most significant bit in case the array is String's array of char. 5066 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 5067 __ shrl(out, Immediate(1)); 5068 } 5069 } 5070 5071 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5072 RegisterSet caller_saves = RegisterSet::Empty(); 5073 InvokeRuntimeCallingConvention calling_convention; 5074 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 5075 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 5076 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 5077 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 5078 HInstruction* length = instruction->InputAt(1); 5079 if (!length->IsEmittedAtUseSite()) { 5080 locations->SetInAt(1, Location::RegisterOrConstant(length)); 5081 } 5082 } 5083 5084 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5085 LocationSummary* locations = instruction->GetLocations(); 5086 Location index_loc = locations->InAt(0); 5087 Location length_loc = locations->InAt(1); 5088 SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction); 5089 5090 if (length_loc.IsConstant()) { 5091 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); 5092 if (index_loc.IsConstant()) { 5093 // BCE will remove the bounds check if we are guarenteed to pass. 5094 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5095 if (index < 0 || index >= length) { 5096 codegen_->AddSlowPath(slow_path); 5097 __ jmp(slow_path->GetEntryLabel()); 5098 } else { 5099 // Some optimization after BCE may have generated this, and we should not 5100 // generate a bounds check if it is a valid range. 5101 } 5102 return; 5103 } 5104 5105 // We have to reverse the jump condition because the length is the constant. 5106 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>(); 5107 __ cmpl(index_reg, Immediate(length)); 5108 codegen_->AddSlowPath(slow_path); 5109 __ j(kAboveEqual, slow_path->GetEntryLabel()); 5110 } else { 5111 HInstruction* array_length = instruction->InputAt(1); 5112 if (array_length->IsEmittedAtUseSite()) { 5113 // Address the length field in the array. 5114 DCHECK(array_length->IsArrayLength()); 5115 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); 5116 Location array_loc = array_length->GetLocations()->InAt(0); 5117 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 5118 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 5119 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for 5120 // the string compression flag) with the in-memory length and avoid the temporary. 5121 CpuRegister length_reg = CpuRegister(TMP); 5122 __ movl(length_reg, array_len); 5123 codegen_->MaybeRecordImplicitNullCheck(array_length); 5124 __ shrl(length_reg, Immediate(1)); 5125 codegen_->GenerateIntCompare(length_reg, index_loc); 5126 } else { 5127 // Checking the bound for general case: 5128 // Array of char or String's array when the compression feature off. 5129 if (index_loc.IsConstant()) { 5130 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5131 __ cmpl(array_len, Immediate(value)); 5132 } else { 5133 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); 5134 } 5135 codegen_->MaybeRecordImplicitNullCheck(array_length); 5136 } 5137 } else { 5138 codegen_->GenerateIntCompare(length_loc, index_loc); 5139 } 5140 codegen_->AddSlowPath(slow_path); 5141 __ j(kBelowEqual, slow_path->GetEntryLabel()); 5142 } 5143 } 5144 5145 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, 5146 CpuRegister card, 5147 CpuRegister object, 5148 CpuRegister value, 5149 bool value_can_be_null) { 5150 NearLabel is_null; 5151 if (value_can_be_null) { 5152 __ testl(value, value); 5153 __ j(kEqual, &is_null); 5154 } 5155 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(), 5156 /* no_rip */ true)); 5157 __ movq(temp, object); 5158 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); 5159 __ movb(Address(temp, card, TIMES_1, 0), card); 5160 if (value_can_be_null) { 5161 __ Bind(&is_null); 5162 } 5163 } 5164 5165 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5166 LOG(FATAL) << "Unimplemented"; 5167 } 5168 5169 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) { 5170 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5171 } 5172 5173 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5174 LocationSummary* locations = 5175 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); 5176 // In suspend check slow path, usually there are no caller-save registers at all. 5177 // If SIMD instructions are present, however, we force spilling all live SIMD 5178 // registers in full width (since the runtime only saves/restores lower part). 5179 locations->SetCustomSlowPathCallerSaves( 5180 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5181 } 5182 5183 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5184 HBasicBlock* block = instruction->GetBlock(); 5185 if (block->GetLoopInformation() != nullptr) { 5186 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5187 // The back edge will generate the suspend check. 5188 return; 5189 } 5190 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5191 // The goto will generate the suspend check. 5192 return; 5193 } 5194 GenerateSuspendCheck(instruction, nullptr); 5195 } 5196 5197 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, 5198 HBasicBlock* successor) { 5199 SuspendCheckSlowPathX86_64* slow_path = 5200 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); 5201 if (slow_path == nullptr) { 5202 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); 5203 instruction->SetSlowPath(slow_path); 5204 codegen_->AddSlowPath(slow_path); 5205 if (successor != nullptr) { 5206 DCHECK(successor->IsLoopHeader()); 5207 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); 5208 } 5209 } else { 5210 DCHECK_EQ(slow_path->GetSuccessor(), successor); 5211 } 5212 5213 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(), 5214 /* no_rip */ true), 5215 Immediate(0)); 5216 if (successor == nullptr) { 5217 __ j(kNotEqual, slow_path->GetEntryLabel()); 5218 __ Bind(slow_path->GetReturnLabel()); 5219 } else { 5220 __ j(kEqual, codegen_->GetLabelOf(successor)); 5221 __ jmp(slow_path->GetEntryLabel()); 5222 } 5223 } 5224 5225 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { 5226 return codegen_->GetAssembler(); 5227 } 5228 5229 void ParallelMoveResolverX86_64::EmitMove(size_t index) { 5230 MoveOperands* move = moves_[index]; 5231 Location source = move->GetSource(); 5232 Location destination = move->GetDestination(); 5233 5234 if (source.IsRegister()) { 5235 if (destination.IsRegister()) { 5236 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); 5237 } else if (destination.IsStackSlot()) { 5238 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 5239 source.AsRegister<CpuRegister>()); 5240 } else { 5241 DCHECK(destination.IsDoubleStackSlot()); 5242 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 5243 source.AsRegister<CpuRegister>()); 5244 } 5245 } else if (source.IsStackSlot()) { 5246 if (destination.IsRegister()) { 5247 __ movl(destination.AsRegister<CpuRegister>(), 5248 Address(CpuRegister(RSP), source.GetStackIndex())); 5249 } else if (destination.IsFpuRegister()) { 5250 __ movss(destination.AsFpuRegister<XmmRegister>(), 5251 Address(CpuRegister(RSP), source.GetStackIndex())); 5252 } else { 5253 DCHECK(destination.IsStackSlot()); 5254 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5255 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5256 } 5257 } else if (source.IsDoubleStackSlot()) { 5258 if (destination.IsRegister()) { 5259 __ movq(destination.AsRegister<CpuRegister>(), 5260 Address(CpuRegister(RSP), source.GetStackIndex())); 5261 } else if (destination.IsFpuRegister()) { 5262 __ movsd(destination.AsFpuRegister<XmmRegister>(), 5263 Address(CpuRegister(RSP), source.GetStackIndex())); 5264 } else { 5265 DCHECK(destination.IsDoubleStackSlot()) << destination; 5266 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5267 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5268 } 5269 } else if (source.IsSIMDStackSlot()) { 5270 DCHECK(destination.IsFpuRegister()); 5271 __ movups(destination.AsFpuRegister<XmmRegister>(), 5272 Address(CpuRegister(RSP), source.GetStackIndex())); 5273 } else if (source.IsConstant()) { 5274 HConstant* constant = source.GetConstant(); 5275 if (constant->IsIntConstant() || constant->IsNullConstant()) { 5276 int32_t value = CodeGenerator::GetInt32ValueOf(constant); 5277 if (destination.IsRegister()) { 5278 if (value == 0) { 5279 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5280 } else { 5281 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value)); 5282 } 5283 } else { 5284 DCHECK(destination.IsStackSlot()) << destination; 5285 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 5286 } 5287 } else if (constant->IsLongConstant()) { 5288 int64_t value = constant->AsLongConstant()->GetValue(); 5289 if (destination.IsRegister()) { 5290 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); 5291 } else { 5292 DCHECK(destination.IsDoubleStackSlot()) << destination; 5293 codegen_->Store64BitValueToStack(destination, value); 5294 } 5295 } else if (constant->IsFloatConstant()) { 5296 float fp_value = constant->AsFloatConstant()->GetValue(); 5297 if (destination.IsFpuRegister()) { 5298 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5299 codegen_->Load32BitValue(dest, fp_value); 5300 } else { 5301 DCHECK(destination.IsStackSlot()) << destination; 5302 Immediate imm(bit_cast<int32_t, float>(fp_value)); 5303 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); 5304 } 5305 } else { 5306 DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); 5307 double fp_value = constant->AsDoubleConstant()->GetValue(); 5308 int64_t value = bit_cast<int64_t, double>(fp_value); 5309 if (destination.IsFpuRegister()) { 5310 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5311 codegen_->Load64BitValue(dest, fp_value); 5312 } else { 5313 DCHECK(destination.IsDoubleStackSlot()) << destination; 5314 codegen_->Store64BitValueToStack(destination, value); 5315 } 5316 } 5317 } else if (source.IsFpuRegister()) { 5318 if (destination.IsFpuRegister()) { 5319 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 5320 } else if (destination.IsStackSlot()) { 5321 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 5322 source.AsFpuRegister<XmmRegister>()); 5323 } else if (destination.IsDoubleStackSlot()) { 5324 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 5325 source.AsFpuRegister<XmmRegister>()); 5326 } else { 5327 DCHECK(destination.IsSIMDStackSlot()); 5328 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()), 5329 source.AsFpuRegister<XmmRegister>()); 5330 } 5331 } 5332 } 5333 5334 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { 5335 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5336 __ movl(Address(CpuRegister(RSP), mem), reg); 5337 __ movl(reg, CpuRegister(TMP)); 5338 } 5339 5340 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) { 5341 ScratchRegisterScope ensure_scratch( 5342 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5343 5344 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5345 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); 5346 __ movl(CpuRegister(ensure_scratch.GetRegister()), 5347 Address(CpuRegister(RSP), mem2 + stack_offset)); 5348 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); 5349 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), 5350 CpuRegister(ensure_scratch.GetRegister())); 5351 } 5352 5353 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { 5354 __ movq(CpuRegister(TMP), reg1); 5355 __ movq(reg1, reg2); 5356 __ movq(reg2, CpuRegister(TMP)); 5357 } 5358 5359 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { 5360 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5361 __ movq(Address(CpuRegister(RSP), mem), reg); 5362 __ movq(reg, CpuRegister(TMP)); 5363 } 5364 5365 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { 5366 ScratchRegisterScope ensure_scratch( 5367 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5368 5369 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5370 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); 5371 __ movq(CpuRegister(ensure_scratch.GetRegister()), 5372 Address(CpuRegister(RSP), mem2 + stack_offset)); 5373 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); 5374 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), 5375 CpuRegister(ensure_scratch.GetRegister())); 5376 } 5377 5378 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { 5379 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5380 __ movss(Address(CpuRegister(RSP), mem), reg); 5381 __ movd(reg, CpuRegister(TMP)); 5382 } 5383 5384 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { 5385 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5386 __ movsd(Address(CpuRegister(RSP), mem), reg); 5387 __ movd(reg, CpuRegister(TMP)); 5388 } 5389 5390 void ParallelMoveResolverX86_64::EmitSwap(size_t index) { 5391 MoveOperands* move = moves_[index]; 5392 Location source = move->GetSource(); 5393 Location destination = move->GetDestination(); 5394 5395 if (source.IsRegister() && destination.IsRegister()) { 5396 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5397 } else if (source.IsRegister() && destination.IsStackSlot()) { 5398 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5399 } else if (source.IsStackSlot() && destination.IsRegister()) { 5400 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5401 } else if (source.IsStackSlot() && destination.IsStackSlot()) { 5402 Exchange32(destination.GetStackIndex(), source.GetStackIndex()); 5403 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { 5404 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5405 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { 5406 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5407 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { 5408 Exchange64(destination.GetStackIndex(), source.GetStackIndex()); 5409 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { 5410 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); 5411 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); 5412 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); 5413 } else if (source.IsFpuRegister() && destination.IsStackSlot()) { 5414 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5415 } else if (source.IsStackSlot() && destination.IsFpuRegister()) { 5416 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5417 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { 5418 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5419 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { 5420 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5421 } else { 5422 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; 5423 } 5424 } 5425 5426 5427 void ParallelMoveResolverX86_64::SpillScratch(int reg) { 5428 __ pushq(CpuRegister(reg)); 5429 } 5430 5431 5432 void ParallelMoveResolverX86_64::RestoreScratch(int reg) { 5433 __ popq(CpuRegister(reg)); 5434 } 5435 5436 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( 5437 SlowPathCode* slow_path, CpuRegister class_reg) { 5438 __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()), 5439 Immediate(mirror::Class::kStatusInitialized)); 5440 __ j(kLess, slow_path->GetEntryLabel()); 5441 __ Bind(slow_path->GetExitLabel()); 5442 // No need for memory fence, thanks to the x86-64 memory model. 5443 } 5444 5445 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( 5446 HLoadClass::LoadKind desired_class_load_kind) { 5447 switch (desired_class_load_kind) { 5448 case HLoadClass::LoadKind::kInvalid: 5449 LOG(FATAL) << "UNREACHABLE"; 5450 UNREACHABLE(); 5451 case HLoadClass::LoadKind::kReferrersClass: 5452 break; 5453 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 5454 case HLoadClass::LoadKind::kBssEntry: 5455 DCHECK(!Runtime::Current()->UseJitCompilation()); 5456 break; 5457 case HLoadClass::LoadKind::kJitTableAddress: 5458 DCHECK(Runtime::Current()->UseJitCompilation()); 5459 break; 5460 case HLoadClass::LoadKind::kBootImageAddress: 5461 case HLoadClass::LoadKind::kRuntimeCall: 5462 break; 5463 } 5464 return desired_class_load_kind; 5465 } 5466 5467 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { 5468 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 5469 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 5470 // Custom calling convention: RAX serves as both input and output. 5471 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 5472 cls, 5473 Location::RegisterLocation(RAX), 5474 Location::RegisterLocation(RAX)); 5475 return; 5476 } 5477 DCHECK(!cls->NeedsAccessCheck()); 5478 5479 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 5480 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 5481 ? LocationSummary::kCallOnSlowPath 5482 : LocationSummary::kNoCall; 5483 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); 5484 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 5485 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5486 } 5487 5488 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 5489 locations->SetInAt(0, Location::RequiresRegister()); 5490 } 5491 locations->SetOut(Location::RequiresRegister()); 5492 if (load_kind == HLoadClass::LoadKind::kBssEntry) { 5493 if (!kUseReadBarrier || kUseBakerReadBarrier) { 5494 // Rely on the type resolution and/or initialization to save everything. 5495 // Custom calling convention: RAX serves as both input and output. 5496 RegisterSet caller_saves = RegisterSet::Empty(); 5497 caller_saves.Add(Location::RegisterLocation(RAX)); 5498 locations->SetCustomSlowPathCallerSaves(caller_saves); 5499 } else { 5500 // For non-Baker read barrier we have a temp-clobbering call. 5501 } 5502 } 5503 } 5504 5505 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, 5506 dex::TypeIndex dex_index, 5507 Handle<mirror::Class> handle) { 5508 jit_class_roots_.Overwrite( 5509 TypeReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference())); 5510 // Add a patch entry and return the label. 5511 jit_class_patches_.emplace_back(dex_file, dex_index.index_); 5512 PatchInfo<Label>* info = &jit_class_patches_.back(); 5513 return &info->label; 5514 } 5515 5516 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 5517 // move. 5518 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 5519 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 5520 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 5521 codegen_->GenerateLoadClassRuntimeCall(cls); 5522 return; 5523 } 5524 DCHECK(!cls->NeedsAccessCheck()); 5525 5526 LocationSummary* locations = cls->GetLocations(); 5527 Location out_loc = locations->Out(); 5528 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5529 5530 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 5531 ? kWithoutReadBarrier 5532 : kCompilerReadBarrierOption; 5533 bool generate_null_check = false; 5534 switch (load_kind) { 5535 case HLoadClass::LoadKind::kReferrersClass: { 5536 DCHECK(!cls->CanCallRuntime()); 5537 DCHECK(!cls->MustGenerateClinitCheck()); 5538 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 5539 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); 5540 GenerateGcRootFieldLoad( 5541 cls, 5542 out_loc, 5543 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), 5544 /* fixup_label */ nullptr, 5545 read_barrier_option); 5546 break; 5547 } 5548 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 5549 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 5550 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 5551 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); 5552 codegen_->RecordBootTypePatch(cls); 5553 break; 5554 case HLoadClass::LoadKind::kBootImageAddress: { 5555 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 5556 uint32_t address = dchecked_integral_cast<uint32_t>( 5557 reinterpret_cast<uintptr_t>(cls->GetClass().Get())); 5558 DCHECK_NE(address, 0u); 5559 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 5560 break; 5561 } 5562 case HLoadClass::LoadKind::kBssEntry: { 5563 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5564 /* no_rip */ false); 5565 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); 5566 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 5567 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 5568 generate_null_check = true; 5569 break; 5570 } 5571 case HLoadClass::LoadKind::kJitTableAddress: { 5572 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5573 /* no_rip */ true); 5574 Label* fixup_label = 5575 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); 5576 // /* GcRoot<mirror::Class> */ out = *address 5577 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 5578 break; 5579 } 5580 default: 5581 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind(); 5582 UNREACHABLE(); 5583 } 5584 5585 if (generate_null_check || cls->MustGenerateClinitCheck()) { 5586 DCHECK(cls->CanCallRuntime()); 5587 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( 5588 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); 5589 codegen_->AddSlowPath(slow_path); 5590 if (generate_null_check) { 5591 __ testl(out, out); 5592 __ j(kEqual, slow_path->GetEntryLabel()); 5593 } 5594 if (cls->MustGenerateClinitCheck()) { 5595 GenerateClassInitializationCheck(slow_path, out); 5596 } else { 5597 __ Bind(slow_path->GetExitLabel()); 5598 } 5599 } 5600 } 5601 5602 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { 5603 LocationSummary* locations = 5604 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 5605 locations->SetInAt(0, Location::RequiresRegister()); 5606 if (check->HasUses()) { 5607 locations->SetOut(Location::SameAsFirstInput()); 5608 } 5609 } 5610 5611 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { 5612 // We assume the class to not be null. 5613 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( 5614 check->GetLoadClass(), check, check->GetDexPc(), true); 5615 codegen_->AddSlowPath(slow_path); 5616 GenerateClassInitializationCheck(slow_path, 5617 check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); 5618 } 5619 5620 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( 5621 HLoadString::LoadKind desired_string_load_kind) { 5622 switch (desired_string_load_kind) { 5623 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 5624 case HLoadString::LoadKind::kBssEntry: 5625 DCHECK(!Runtime::Current()->UseJitCompilation()); 5626 break; 5627 case HLoadString::LoadKind::kJitTableAddress: 5628 DCHECK(Runtime::Current()->UseJitCompilation()); 5629 break; 5630 case HLoadString::LoadKind::kBootImageAddress: 5631 case HLoadString::LoadKind::kRuntimeCall: 5632 break; 5633 } 5634 return desired_string_load_kind; 5635 } 5636 5637 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { 5638 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 5639 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); 5640 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 5641 locations->SetOut(Location::RegisterLocation(RAX)); 5642 } else { 5643 locations->SetOut(Location::RequiresRegister()); 5644 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 5645 if (!kUseReadBarrier || kUseBakerReadBarrier) { 5646 // Rely on the pResolveString to save everything. 5647 // Custom calling convention: RAX serves as both input and output. 5648 RegisterSet caller_saves = RegisterSet::Empty(); 5649 caller_saves.Add(Location::RegisterLocation(RAX)); 5650 locations->SetCustomSlowPathCallerSaves(caller_saves); 5651 } else { 5652 // For non-Baker read barrier we have a temp-clobbering call. 5653 } 5654 } 5655 } 5656 } 5657 5658 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, 5659 dex::StringIndex dex_index, 5660 Handle<mirror::String> handle) { 5661 jit_string_roots_.Overwrite( 5662 StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference())); 5663 // Add a patch entry and return the label. 5664 jit_string_patches_.emplace_back(dex_file, dex_index.index_); 5665 PatchInfo<Label>* info = &jit_string_patches_.back(); 5666 return &info->label; 5667 } 5668 5669 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 5670 // move. 5671 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 5672 LocationSummary* locations = load->GetLocations(); 5673 Location out_loc = locations->Out(); 5674 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5675 5676 switch (load->GetLoadKind()) { 5677 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 5678 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 5679 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); 5680 codegen_->RecordBootStringPatch(load); 5681 return; // No dex cache slow path. 5682 } 5683 case HLoadString::LoadKind::kBootImageAddress: { 5684 uint32_t address = dchecked_integral_cast<uint32_t>( 5685 reinterpret_cast<uintptr_t>(load->GetString().Get())); 5686 DCHECK_NE(address, 0u); 5687 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 5688 return; // No dex cache slow path. 5689 } 5690 case HLoadString::LoadKind::kBssEntry: { 5691 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5692 /* no_rip */ false); 5693 Label* fixup_label = codegen_->NewStringBssEntryPatch(load); 5694 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 5695 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 5696 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); 5697 codegen_->AddSlowPath(slow_path); 5698 __ testl(out, out); 5699 __ j(kEqual, slow_path->GetEntryLabel()); 5700 __ Bind(slow_path->GetExitLabel()); 5701 return; 5702 } 5703 case HLoadString::LoadKind::kJitTableAddress: { 5704 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5705 /* no_rip */ true); 5706 Label* fixup_label = codegen_->NewJitRootStringPatch( 5707 load->GetDexFile(), load->GetStringIndex(), load->GetString()); 5708 // /* GcRoot<mirror::String> */ out = *address 5709 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 5710 return; 5711 } 5712 default: 5713 break; 5714 } 5715 5716 // TODO: Re-add the compiler code to do string dex cache lookup again. 5717 // Custom calling convention: RAX serves as both input and output. 5718 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_)); 5719 codegen_->InvokeRuntime(kQuickResolveString, 5720 load, 5721 load->GetDexPc()); 5722 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 5723 } 5724 5725 static Address GetExceptionTlsAddress() { 5726 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(), 5727 /* no_rip */ true); 5728 } 5729 5730 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { 5731 LocationSummary* locations = 5732 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); 5733 locations->SetOut(Location::RequiresRegister()); 5734 } 5735 5736 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) { 5737 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress()); 5738 } 5739 5740 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { 5741 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); 5742 } 5743 5744 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 5745 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0)); 5746 } 5747 5748 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { 5749 LocationSummary* locations = 5750 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 5751 InvokeRuntimeCallingConvention calling_convention; 5752 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 5753 } 5754 5755 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { 5756 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 5757 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 5758 } 5759 5760 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { 5761 if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) { 5762 // We need a temporary for holding the iftable length. 5763 return true; 5764 } 5765 return kEmitCompilerReadBarrier && 5766 !kUseBakerReadBarrier && 5767 (type_check_kind == TypeCheckKind::kAbstractClassCheck || 5768 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 5769 type_check_kind == TypeCheckKind::kArrayObjectCheck); 5770 } 5771 5772 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { 5773 return kEmitCompilerReadBarrier && 5774 !kUseBakerReadBarrier && 5775 (type_check_kind == TypeCheckKind::kAbstractClassCheck || 5776 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 5777 type_check_kind == TypeCheckKind::kArrayObjectCheck); 5778 } 5779 5780 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { 5781 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 5782 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 5783 bool baker_read_barrier_slow_path = false; 5784 switch (type_check_kind) { 5785 case TypeCheckKind::kExactCheck: 5786 case TypeCheckKind::kAbstractClassCheck: 5787 case TypeCheckKind::kClassHierarchyCheck: 5788 case TypeCheckKind::kArrayObjectCheck: 5789 call_kind = 5790 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 5791 baker_read_barrier_slow_path = kUseBakerReadBarrier; 5792 break; 5793 case TypeCheckKind::kArrayCheck: 5794 case TypeCheckKind::kUnresolvedCheck: 5795 case TypeCheckKind::kInterfaceCheck: 5796 call_kind = LocationSummary::kCallOnSlowPath; 5797 break; 5798 } 5799 5800 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 5801 if (baker_read_barrier_slow_path) { 5802 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5803 } 5804 locations->SetInAt(0, Location::RequiresRegister()); 5805 locations->SetInAt(1, Location::Any()); 5806 // Note that TypeCheckSlowPathX86_64 uses this "out" register too. 5807 locations->SetOut(Location::RequiresRegister()); 5808 // When read barriers are enabled, we need a temporary register for 5809 // some cases. 5810 if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { 5811 locations->AddTemp(Location::RequiresRegister()); 5812 } 5813 } 5814 5815 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { 5816 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 5817 LocationSummary* locations = instruction->GetLocations(); 5818 Location obj_loc = locations->InAt(0); 5819 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 5820 Location cls = locations->InAt(1); 5821 Location out_loc = locations->Out(); 5822 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5823 Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? 5824 locations->GetTemp(0) : 5825 Location::NoLocation(); 5826 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 5827 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 5828 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 5829 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 5830 SlowPathCode* slow_path = nullptr; 5831 NearLabel done, zero; 5832 5833 // Return 0 if `obj` is null. 5834 // Avoid null check if we know obj is not null. 5835 if (instruction->MustDoNullCheck()) { 5836 __ testl(obj, obj); 5837 __ j(kEqual, &zero); 5838 } 5839 5840 switch (type_check_kind) { 5841 case TypeCheckKind::kExactCheck: { 5842 // /* HeapReference<Class> */ out = obj->klass_ 5843 GenerateReferenceLoadTwoRegisters(instruction, 5844 out_loc, 5845 obj_loc, 5846 class_offset, 5847 kCompilerReadBarrierOption); 5848 if (cls.IsRegister()) { 5849 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5850 } else { 5851 DCHECK(cls.IsStackSlot()) << cls; 5852 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5853 } 5854 if (zero.IsLinked()) { 5855 // Classes must be equal for the instanceof to succeed. 5856 __ j(kNotEqual, &zero); 5857 __ movl(out, Immediate(1)); 5858 __ jmp(&done); 5859 } else { 5860 __ setcc(kEqual, out); 5861 // setcc only sets the low byte. 5862 __ andl(out, Immediate(1)); 5863 } 5864 break; 5865 } 5866 5867 case TypeCheckKind::kAbstractClassCheck: { 5868 // /* HeapReference<Class> */ out = obj->klass_ 5869 GenerateReferenceLoadTwoRegisters(instruction, 5870 out_loc, 5871 obj_loc, 5872 class_offset, 5873 kCompilerReadBarrierOption); 5874 // If the class is abstract, we eagerly fetch the super class of the 5875 // object to avoid doing a comparison we know will fail. 5876 NearLabel loop, success; 5877 __ Bind(&loop); 5878 // /* HeapReference<Class> */ out = out->super_class_ 5879 GenerateReferenceLoadOneRegister(instruction, 5880 out_loc, 5881 super_offset, 5882 maybe_temp_loc, 5883 kCompilerReadBarrierOption); 5884 __ testl(out, out); 5885 // If `out` is null, we use it for the result, and jump to `done`. 5886 __ j(kEqual, &done); 5887 if (cls.IsRegister()) { 5888 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5889 } else { 5890 DCHECK(cls.IsStackSlot()) << cls; 5891 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5892 } 5893 __ j(kNotEqual, &loop); 5894 __ movl(out, Immediate(1)); 5895 if (zero.IsLinked()) { 5896 __ jmp(&done); 5897 } 5898 break; 5899 } 5900 5901 case TypeCheckKind::kClassHierarchyCheck: { 5902 // /* HeapReference<Class> */ out = obj->klass_ 5903 GenerateReferenceLoadTwoRegisters(instruction, 5904 out_loc, 5905 obj_loc, 5906 class_offset, 5907 kCompilerReadBarrierOption); 5908 // Walk over the class hierarchy to find a match. 5909 NearLabel loop, success; 5910 __ Bind(&loop); 5911 if (cls.IsRegister()) { 5912 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5913 } else { 5914 DCHECK(cls.IsStackSlot()) << cls; 5915 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5916 } 5917 __ j(kEqual, &success); 5918 // /* HeapReference<Class> */ out = out->super_class_ 5919 GenerateReferenceLoadOneRegister(instruction, 5920 out_loc, 5921 super_offset, 5922 maybe_temp_loc, 5923 kCompilerReadBarrierOption); 5924 __ testl(out, out); 5925 __ j(kNotEqual, &loop); 5926 // If `out` is null, we use it for the result, and jump to `done`. 5927 __ jmp(&done); 5928 __ Bind(&success); 5929 __ movl(out, Immediate(1)); 5930 if (zero.IsLinked()) { 5931 __ jmp(&done); 5932 } 5933 break; 5934 } 5935 5936 case TypeCheckKind::kArrayObjectCheck: { 5937 // /* HeapReference<Class> */ out = obj->klass_ 5938 GenerateReferenceLoadTwoRegisters(instruction, 5939 out_loc, 5940 obj_loc, 5941 class_offset, 5942 kCompilerReadBarrierOption); 5943 // Do an exact check. 5944 NearLabel exact_check; 5945 if (cls.IsRegister()) { 5946 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5947 } else { 5948 DCHECK(cls.IsStackSlot()) << cls; 5949 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5950 } 5951 __ j(kEqual, &exact_check); 5952 // Otherwise, we need to check that the object's class is a non-primitive array. 5953 // /* HeapReference<Class> */ out = out->component_type_ 5954 GenerateReferenceLoadOneRegister(instruction, 5955 out_loc, 5956 component_offset, 5957 maybe_temp_loc, 5958 kCompilerReadBarrierOption); 5959 __ testl(out, out); 5960 // If `out` is null, we use it for the result, and jump to `done`. 5961 __ j(kEqual, &done); 5962 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot)); 5963 __ j(kNotEqual, &zero); 5964 __ Bind(&exact_check); 5965 __ movl(out, Immediate(1)); 5966 __ jmp(&done); 5967 break; 5968 } 5969 5970 case TypeCheckKind::kArrayCheck: { 5971 // No read barrier since the slow path will retry upon failure. 5972 // /* HeapReference<Class> */ out = obj->klass_ 5973 GenerateReferenceLoadTwoRegisters(instruction, 5974 out_loc, 5975 obj_loc, 5976 class_offset, 5977 kWithoutReadBarrier); 5978 if (cls.IsRegister()) { 5979 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5980 } else { 5981 DCHECK(cls.IsStackSlot()) << cls; 5982 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5983 } 5984 DCHECK(locations->OnlyCallsOnSlowPath()); 5985 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, 5986 /* is_fatal */ false); 5987 codegen_->AddSlowPath(slow_path); 5988 __ j(kNotEqual, slow_path->GetEntryLabel()); 5989 __ movl(out, Immediate(1)); 5990 if (zero.IsLinked()) { 5991 __ jmp(&done); 5992 } 5993 break; 5994 } 5995 5996 case TypeCheckKind::kUnresolvedCheck: 5997 case TypeCheckKind::kInterfaceCheck: { 5998 // Note that we indeed only call on slow path, but we always go 5999 // into the slow path for the unresolved and interface check 6000 // cases. 6001 // 6002 // We cannot directly call the InstanceofNonTrivial runtime 6003 // entry point without resorting to a type checking slow path 6004 // here (i.e. by calling InvokeRuntime directly), as it would 6005 // require to assign fixed registers for the inputs of this 6006 // HInstanceOf instruction (following the runtime calling 6007 // convention), which might be cluttered by the potential first 6008 // read barrier emission at the beginning of this method. 6009 // 6010 // TODO: Introduce a new runtime entry point taking the object 6011 // to test (instead of its class) as argument, and let it deal 6012 // with the read barrier issues. This will let us refactor this 6013 // case of the `switch` code as it was previously (with a direct 6014 // call to the runtime not using a type checking slow path). 6015 // This should also be beneficial for the other cases above. 6016 DCHECK(locations->OnlyCallsOnSlowPath()); 6017 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, 6018 /* is_fatal */ false); 6019 codegen_->AddSlowPath(slow_path); 6020 __ jmp(slow_path->GetEntryLabel()); 6021 if (zero.IsLinked()) { 6022 __ jmp(&done); 6023 } 6024 break; 6025 } 6026 } 6027 6028 if (zero.IsLinked()) { 6029 __ Bind(&zero); 6030 __ xorl(out, out); 6031 } 6032 6033 if (done.IsLinked()) { 6034 __ Bind(&done); 6035 } 6036 6037 if (slow_path != nullptr) { 6038 __ Bind(slow_path->GetExitLabel()); 6039 } 6040 } 6041 6042 static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) { 6043 switch (type_check_kind) { 6044 case TypeCheckKind::kExactCheck: 6045 case TypeCheckKind::kAbstractClassCheck: 6046 case TypeCheckKind::kClassHierarchyCheck: 6047 case TypeCheckKind::kArrayObjectCheck: 6048 return !throws_into_catch && !kEmitCompilerReadBarrier; 6049 case TypeCheckKind::kInterfaceCheck: 6050 return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences; 6051 case TypeCheckKind::kArrayCheck: 6052 case TypeCheckKind::kUnresolvedCheck: 6053 return false; 6054 } 6055 LOG(FATAL) << "Unreachable"; 6056 UNREACHABLE(); 6057 } 6058 6059 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { 6060 bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); 6061 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6062 bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch); 6063 LocationSummary::CallKind call_kind = is_fatal_slow_path 6064 ? LocationSummary::kNoCall 6065 : LocationSummary::kCallOnSlowPath; 6066 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 6067 locations->SetInAt(0, Location::RequiresRegister()); 6068 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 6069 // Require a register for the interface check since there is a loop that compares the class to 6070 // a memory address. 6071 locations->SetInAt(1, Location::RequiresRegister()); 6072 } else { 6073 locations->SetInAt(1, Location::Any()); 6074 } 6075 6076 // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. 6077 locations->AddTemp(Location::RequiresRegister()); 6078 // When read barriers are enabled, we need an additional temporary 6079 // register for some cases. 6080 if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { 6081 locations->AddTemp(Location::RequiresRegister()); 6082 } 6083 } 6084 6085 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { 6086 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6087 LocationSummary* locations = instruction->GetLocations(); 6088 Location obj_loc = locations->InAt(0); 6089 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 6090 Location cls = locations->InAt(1); 6091 Location temp_loc = locations->GetTemp(0); 6092 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 6093 Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? 6094 locations->GetTemp(1) : 6095 Location::NoLocation(); 6096 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6097 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6098 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6099 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 6100 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 6101 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 6102 const uint32_t object_array_data_offset = 6103 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 6104 6105 // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases 6106 // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding 6107 // read barriers is done for performance and code size reasons. 6108 bool is_type_check_slow_path_fatal = 6109 IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock()); 6110 SlowPathCode* type_check_slow_path = 6111 new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, 6112 is_type_check_slow_path_fatal); 6113 codegen_->AddSlowPath(type_check_slow_path); 6114 6115 6116 NearLabel done; 6117 // Avoid null check if we know obj is not null. 6118 if (instruction->MustDoNullCheck()) { 6119 __ testl(obj, obj); 6120 __ j(kEqual, &done); 6121 } 6122 6123 switch (type_check_kind) { 6124 case TypeCheckKind::kExactCheck: 6125 case TypeCheckKind::kArrayCheck: { 6126 // /* HeapReference<Class> */ temp = obj->klass_ 6127 GenerateReferenceLoadTwoRegisters(instruction, 6128 temp_loc, 6129 obj_loc, 6130 class_offset, 6131 kWithoutReadBarrier); 6132 if (cls.IsRegister()) { 6133 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6134 } else { 6135 DCHECK(cls.IsStackSlot()) << cls; 6136 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6137 } 6138 // Jump to slow path for throwing the exception or doing a 6139 // more involved array check. 6140 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6141 break; 6142 } 6143 6144 case TypeCheckKind::kAbstractClassCheck: { 6145 // /* HeapReference<Class> */ temp = obj->klass_ 6146 GenerateReferenceLoadTwoRegisters(instruction, 6147 temp_loc, 6148 obj_loc, 6149 class_offset, 6150 kWithoutReadBarrier); 6151 // If the class is abstract, we eagerly fetch the super class of the 6152 // object to avoid doing a comparison we know will fail. 6153 NearLabel loop; 6154 __ Bind(&loop); 6155 // /* HeapReference<Class> */ temp = temp->super_class_ 6156 GenerateReferenceLoadOneRegister(instruction, 6157 temp_loc, 6158 super_offset, 6159 maybe_temp2_loc, 6160 kWithoutReadBarrier); 6161 6162 // If the class reference currently in `temp` is null, jump to the slow path to throw the 6163 // exception. 6164 __ testl(temp, temp); 6165 // Otherwise, compare the classes. 6166 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6167 if (cls.IsRegister()) { 6168 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6169 } else { 6170 DCHECK(cls.IsStackSlot()) << cls; 6171 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6172 } 6173 __ j(kNotEqual, &loop); 6174 break; 6175 } 6176 6177 case TypeCheckKind::kClassHierarchyCheck: { 6178 // /* HeapReference<Class> */ temp = obj->klass_ 6179 GenerateReferenceLoadTwoRegisters(instruction, 6180 temp_loc, 6181 obj_loc, 6182 class_offset, 6183 kWithoutReadBarrier); 6184 // Walk over the class hierarchy to find a match. 6185 NearLabel loop; 6186 __ Bind(&loop); 6187 if (cls.IsRegister()) { 6188 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6189 } else { 6190 DCHECK(cls.IsStackSlot()) << cls; 6191 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6192 } 6193 __ j(kEqual, &done); 6194 6195 // /* HeapReference<Class> */ temp = temp->super_class_ 6196 GenerateReferenceLoadOneRegister(instruction, 6197 temp_loc, 6198 super_offset, 6199 maybe_temp2_loc, 6200 kWithoutReadBarrier); 6201 6202 // If the class reference currently in `temp` is not null, jump 6203 // back at the beginning of the loop. 6204 __ testl(temp, temp); 6205 __ j(kNotZero, &loop); 6206 // Otherwise, jump to the slow path to throw the exception. 6207 __ jmp(type_check_slow_path->GetEntryLabel()); 6208 break; 6209 } 6210 6211 case TypeCheckKind::kArrayObjectCheck: { 6212 // /* HeapReference<Class> */ temp = obj->klass_ 6213 GenerateReferenceLoadTwoRegisters(instruction, 6214 temp_loc, 6215 obj_loc, 6216 class_offset, 6217 kWithoutReadBarrier); 6218 // Do an exact check. 6219 NearLabel check_non_primitive_component_type; 6220 if (cls.IsRegister()) { 6221 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6222 } else { 6223 DCHECK(cls.IsStackSlot()) << cls; 6224 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6225 } 6226 __ j(kEqual, &done); 6227 6228 // Otherwise, we need to check that the object's class is a non-primitive array. 6229 // /* HeapReference<Class> */ temp = temp->component_type_ 6230 GenerateReferenceLoadOneRegister(instruction, 6231 temp_loc, 6232 component_offset, 6233 maybe_temp2_loc, 6234 kWithoutReadBarrier); 6235 6236 // If the component type is not null (i.e. the object is indeed 6237 // an array), jump to label `check_non_primitive_component_type` 6238 // to further check that this component type is not a primitive 6239 // type. 6240 __ testl(temp, temp); 6241 // Otherwise, jump to the slow path to throw the exception. 6242 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6243 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); 6244 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6245 break; 6246 } 6247 6248 case TypeCheckKind::kUnresolvedCheck: { 6249 // We always go into the type check slow path for the unresolved case. 6250 // 6251 // We cannot directly call the CheckCast runtime entry point 6252 // without resorting to a type checking slow path here (i.e. by 6253 // calling InvokeRuntime directly), as it would require to 6254 // assign fixed registers for the inputs of this HInstanceOf 6255 // instruction (following the runtime calling convention), which 6256 // might be cluttered by the potential first read barrier 6257 // emission at the beginning of this method. 6258 __ jmp(type_check_slow_path->GetEntryLabel()); 6259 break; 6260 } 6261 6262 case TypeCheckKind::kInterfaceCheck: 6263 // Fast path for the interface check. We always go slow path for heap poisoning since 6264 // unpoisoning cls would require an extra temp. 6265 if (!kPoisonHeapReferences) { 6266 // Try to avoid read barriers to improve the fast path. We can not get false positives by 6267 // doing this. 6268 // /* HeapReference<Class> */ temp = obj->klass_ 6269 GenerateReferenceLoadTwoRegisters(instruction, 6270 temp_loc, 6271 obj_loc, 6272 class_offset, 6273 kWithoutReadBarrier); 6274 6275 // /* HeapReference<Class> */ temp = temp->iftable_ 6276 GenerateReferenceLoadTwoRegisters(instruction, 6277 temp_loc, 6278 temp_loc, 6279 iftable_offset, 6280 kWithoutReadBarrier); 6281 // Iftable is never null. 6282 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); 6283 // Loop through the iftable and check if any class matches. 6284 NearLabel start_loop; 6285 __ Bind(&start_loop); 6286 // Need to subtract first to handle the empty array case. 6287 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); 6288 __ j(kNegative, type_check_slow_path->GetEntryLabel()); 6289 // Go to next interface if the classes do not match. 6290 __ cmpl(cls.AsRegister<CpuRegister>(), 6291 CodeGeneratorX86_64::ArrayAddress(temp, 6292 maybe_temp2_loc, 6293 TIMES_4, 6294 object_array_data_offset)); 6295 __ j(kNotEqual, &start_loop); // Return if same class. 6296 } else { 6297 __ jmp(type_check_slow_path->GetEntryLabel()); 6298 } 6299 break; 6300 } 6301 6302 if (done.IsLinked()) { 6303 __ Bind(&done); 6304 } 6305 6306 __ Bind(type_check_slow_path->GetExitLabel()); 6307 } 6308 6309 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6310 LocationSummary* locations = 6311 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); 6312 InvokeRuntimeCallingConvention calling_convention; 6313 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6314 } 6315 6316 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6317 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 6318 instruction, 6319 instruction->GetDexPc()); 6320 if (instruction->IsEnter()) { 6321 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 6322 } else { 6323 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 6324 } 6325 } 6326 6327 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } 6328 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } 6329 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } 6330 6331 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6332 LocationSummary* locations = 6333 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 6334 DCHECK(instruction->GetResultType() == Primitive::kPrimInt 6335 || instruction->GetResultType() == Primitive::kPrimLong); 6336 locations->SetInAt(0, Location::RequiresRegister()); 6337 locations->SetInAt(1, Location::Any()); 6338 locations->SetOut(Location::SameAsFirstInput()); 6339 } 6340 6341 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) { 6342 HandleBitwiseOperation(instruction); 6343 } 6344 6345 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) { 6346 HandleBitwiseOperation(instruction); 6347 } 6348 6349 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) { 6350 HandleBitwiseOperation(instruction); 6351 } 6352 6353 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6354 LocationSummary* locations = instruction->GetLocations(); 6355 Location first = locations->InAt(0); 6356 Location second = locations->InAt(1); 6357 DCHECK(first.Equals(locations->Out())); 6358 6359 if (instruction->GetResultType() == Primitive::kPrimInt) { 6360 if (second.IsRegister()) { 6361 if (instruction->IsAnd()) { 6362 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6363 } else if (instruction->IsOr()) { 6364 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6365 } else { 6366 DCHECK(instruction->IsXor()); 6367 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6368 } 6369 } else if (second.IsConstant()) { 6370 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 6371 if (instruction->IsAnd()) { 6372 __ andl(first.AsRegister<CpuRegister>(), imm); 6373 } else if (instruction->IsOr()) { 6374 __ orl(first.AsRegister<CpuRegister>(), imm); 6375 } else { 6376 DCHECK(instruction->IsXor()); 6377 __ xorl(first.AsRegister<CpuRegister>(), imm); 6378 } 6379 } else { 6380 Address address(CpuRegister(RSP), second.GetStackIndex()); 6381 if (instruction->IsAnd()) { 6382 __ andl(first.AsRegister<CpuRegister>(), address); 6383 } else if (instruction->IsOr()) { 6384 __ orl(first.AsRegister<CpuRegister>(), address); 6385 } else { 6386 DCHECK(instruction->IsXor()); 6387 __ xorl(first.AsRegister<CpuRegister>(), address); 6388 } 6389 } 6390 } else { 6391 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); 6392 CpuRegister first_reg = first.AsRegister<CpuRegister>(); 6393 bool second_is_constant = false; 6394 int64_t value = 0; 6395 if (second.IsConstant()) { 6396 second_is_constant = true; 6397 value = second.GetConstant()->AsLongConstant()->GetValue(); 6398 } 6399 bool is_int32_value = IsInt<32>(value); 6400 6401 if (instruction->IsAnd()) { 6402 if (second_is_constant) { 6403 if (is_int32_value) { 6404 __ andq(first_reg, Immediate(static_cast<int32_t>(value))); 6405 } else { 6406 __ andq(first_reg, codegen_->LiteralInt64Address(value)); 6407 } 6408 } else if (second.IsDoubleStackSlot()) { 6409 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6410 } else { 6411 __ andq(first_reg, second.AsRegister<CpuRegister>()); 6412 } 6413 } else if (instruction->IsOr()) { 6414 if (second_is_constant) { 6415 if (is_int32_value) { 6416 __ orq(first_reg, Immediate(static_cast<int32_t>(value))); 6417 } else { 6418 __ orq(first_reg, codegen_->LiteralInt64Address(value)); 6419 } 6420 } else if (second.IsDoubleStackSlot()) { 6421 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6422 } else { 6423 __ orq(first_reg, second.AsRegister<CpuRegister>()); 6424 } 6425 } else { 6426 DCHECK(instruction->IsXor()); 6427 if (second_is_constant) { 6428 if (is_int32_value) { 6429 __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); 6430 } else { 6431 __ xorq(first_reg, codegen_->LiteralInt64Address(value)); 6432 } 6433 } else if (second.IsDoubleStackSlot()) { 6434 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6435 } else { 6436 __ xorq(first_reg, second.AsRegister<CpuRegister>()); 6437 } 6438 } 6439 } 6440 } 6441 6442 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( 6443 HInstruction* instruction, 6444 Location out, 6445 uint32_t offset, 6446 Location maybe_temp, 6447 ReadBarrierOption read_barrier_option) { 6448 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 6449 if (read_barrier_option == kWithReadBarrier) { 6450 CHECK(kEmitCompilerReadBarrier); 6451 if (kUseBakerReadBarrier) { 6452 // Load with fast path based Baker's read barrier. 6453 // /* HeapReference<Object> */ out = *(out + offset) 6454 codegen_->GenerateFieldLoadWithBakerReadBarrier( 6455 instruction, out, out_reg, offset, /* needs_null_check */ false); 6456 } else { 6457 // Load with slow path based read barrier. 6458 // Save the value of `out` into `maybe_temp` before overwriting it 6459 // in the following move operation, as we will need it for the 6460 // read barrier below. 6461 DCHECK(maybe_temp.IsRegister()) << maybe_temp; 6462 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); 6463 // /* HeapReference<Object> */ out = *(out + offset) 6464 __ movl(out_reg, Address(out_reg, offset)); 6465 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 6466 } 6467 } else { 6468 // Plain load with no read barrier. 6469 // /* HeapReference<Object> */ out = *(out + offset) 6470 __ movl(out_reg, Address(out_reg, offset)); 6471 __ MaybeUnpoisonHeapReference(out_reg); 6472 } 6473 } 6474 6475 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( 6476 HInstruction* instruction, 6477 Location out, 6478 Location obj, 6479 uint32_t offset, 6480 ReadBarrierOption read_barrier_option) { 6481 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 6482 CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); 6483 if (read_barrier_option == kWithReadBarrier) { 6484 CHECK(kEmitCompilerReadBarrier); 6485 if (kUseBakerReadBarrier) { 6486 // Load with fast path based Baker's read barrier. 6487 // /* HeapReference<Object> */ out = *(obj + offset) 6488 codegen_->GenerateFieldLoadWithBakerReadBarrier( 6489 instruction, out, obj_reg, offset, /* needs_null_check */ false); 6490 } else { 6491 // Load with slow path based read barrier. 6492 // /* HeapReference<Object> */ out = *(obj + offset) 6493 __ movl(out_reg, Address(obj_reg, offset)); 6494 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 6495 } 6496 } else { 6497 // Plain load with no read barrier. 6498 // /* HeapReference<Object> */ out = *(obj + offset) 6499 __ movl(out_reg, Address(obj_reg, offset)); 6500 __ MaybeUnpoisonHeapReference(out_reg); 6501 } 6502 } 6503 6504 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( 6505 HInstruction* instruction, 6506 Location root, 6507 const Address& address, 6508 Label* fixup_label, 6509 ReadBarrierOption read_barrier_option) { 6510 CpuRegister root_reg = root.AsRegister<CpuRegister>(); 6511 if (read_barrier_option == kWithReadBarrier) { 6512 DCHECK(kEmitCompilerReadBarrier); 6513 if (kUseBakerReadBarrier) { 6514 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 6515 // Baker's read barrier are used: 6516 // 6517 // root = obj.field; 6518 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() 6519 // if (temp != null) { 6520 // root = temp(root) 6521 // } 6522 6523 // /* GcRoot<mirror::Object> */ root = *address 6524 __ movl(root_reg, address); 6525 if (fixup_label != nullptr) { 6526 __ Bind(fixup_label); 6527 } 6528 static_assert( 6529 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 6530 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 6531 "have different sizes."); 6532 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 6533 "art::mirror::CompressedReference<mirror::Object> and int32_t " 6534 "have different sizes."); 6535 6536 // Slow path marking the GC root `root`. 6537 SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( 6538 instruction, root, /* unpoison_ref_before_marking */ false); 6539 codegen_->AddSlowPath(slow_path); 6540 6541 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint. 6542 const int32_t entry_point_offset = 6543 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); 6544 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0)); 6545 // The entrypoint is null when the GC is not marking. 6546 __ j(kNotEqual, slow_path->GetEntryLabel()); 6547 __ Bind(slow_path->GetExitLabel()); 6548 } else { 6549 // GC root loaded through a slow path for read barriers other 6550 // than Baker's. 6551 // /* GcRoot<mirror::Object>* */ root = address 6552 __ leaq(root_reg, address); 6553 if (fixup_label != nullptr) { 6554 __ Bind(fixup_label); 6555 } 6556 // /* mirror::Object* */ root = root->Read() 6557 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 6558 } 6559 } else { 6560 // Plain GC root load with no read barrier. 6561 // /* GcRoot<mirror::Object> */ root = *address 6562 __ movl(root_reg, address); 6563 if (fixup_label != nullptr) { 6564 __ Bind(fixup_label); 6565 } 6566 // Note that GC roots are not affected by heap poisoning, thus we 6567 // do not have to unpoison `root_reg` here. 6568 } 6569 } 6570 6571 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 6572 Location ref, 6573 CpuRegister obj, 6574 uint32_t offset, 6575 bool needs_null_check) { 6576 DCHECK(kEmitCompilerReadBarrier); 6577 DCHECK(kUseBakerReadBarrier); 6578 6579 // /* HeapReference<Object> */ ref = *(obj + offset) 6580 Address src(obj, offset); 6581 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 6582 } 6583 6584 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 6585 Location ref, 6586 CpuRegister obj, 6587 uint32_t data_offset, 6588 Location index, 6589 bool needs_null_check) { 6590 DCHECK(kEmitCompilerReadBarrier); 6591 DCHECK(kUseBakerReadBarrier); 6592 6593 static_assert( 6594 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 6595 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 6596 // /* HeapReference<Object> */ ref = 6597 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 6598 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset); 6599 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 6600 } 6601 6602 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 6603 Location ref, 6604 CpuRegister obj, 6605 const Address& src, 6606 bool needs_null_check, 6607 bool always_update_field, 6608 CpuRegister* temp1, 6609 CpuRegister* temp2) { 6610 DCHECK(kEmitCompilerReadBarrier); 6611 DCHECK(kUseBakerReadBarrier); 6612 6613 // In slow path based read barriers, the read barrier call is 6614 // inserted after the original load. However, in fast path based 6615 // Baker's read barriers, we need to perform the load of 6616 // mirror::Object::monitor_ *before* the original reference load. 6617 // This load-load ordering is required by the read barrier. 6618 // The fast path/slow path (for Baker's algorithm) should look like: 6619 // 6620 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 6621 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 6622 // HeapReference<Object> ref = *src; // Original reference load. 6623 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 6624 // if (is_gray) { 6625 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. 6626 // } 6627 // 6628 // Note: the original implementation in ReadBarrier::Barrier is 6629 // slightly more complex as: 6630 // - it implements the load-load fence using a data dependency on 6631 // the high-bits of rb_state, which are expected to be all zeroes 6632 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead 6633 // here, which is a no-op thanks to the x86-64 memory model); 6634 // - it performs additional checks that we do not do here for 6635 // performance reasons. 6636 6637 CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); 6638 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 6639 6640 // Given the numeric representation, it's enough to check the low bit of the rb_state. 6641 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 6642 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 6643 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 6644 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 6645 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 6646 6647 // if (rb_state == ReadBarrier::GrayState()) 6648 // ref = ReadBarrier::Mark(ref); 6649 // At this point, just do the "if" and make sure that flags are preserved until the branch. 6650 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); 6651 if (needs_null_check) { 6652 MaybeRecordImplicitNullCheck(instruction); 6653 } 6654 6655 // Load fence to prevent load-load reordering. 6656 // Note that this is a no-op, thanks to the x86-64 memory model. 6657 GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 6658 6659 // The actual reference load. 6660 // /* HeapReference<Object> */ ref = *src 6661 __ movl(ref_reg, src); // Flags are unaffected. 6662 6663 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. 6664 // Slow path marking the object `ref` when it is gray. 6665 SlowPathCode* slow_path; 6666 if (always_update_field) { 6667 DCHECK(temp1 != nullptr); 6668 DCHECK(temp2 != nullptr); 6669 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( 6670 instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); 6671 } else { 6672 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( 6673 instruction, ref, /* unpoison_ref_before_marking */ true); 6674 } 6675 AddSlowPath(slow_path); 6676 6677 // We have done the "if" of the gray bit check above, now branch based on the flags. 6678 __ j(kNotZero, slow_path->GetEntryLabel()); 6679 6680 // Object* ref = ref_addr->AsMirrorPtr() 6681 __ MaybeUnpoisonHeapReference(ref_reg); 6682 6683 __ Bind(slow_path->GetExitLabel()); 6684 } 6685 6686 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, 6687 Location out, 6688 Location ref, 6689 Location obj, 6690 uint32_t offset, 6691 Location index) { 6692 DCHECK(kEmitCompilerReadBarrier); 6693 6694 // Insert a slow path based read barrier *after* the reference load. 6695 // 6696 // If heap poisoning is enabled, the unpoisoning of the loaded 6697 // reference will be carried out by the runtime within the slow 6698 // path. 6699 // 6700 // Note that `ref` currently does not get unpoisoned (when heap 6701 // poisoning is enabled), which is alright as the `ref` argument is 6702 // not used by the artReadBarrierSlow entry point. 6703 // 6704 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 6705 SlowPathCode* slow_path = new (GetGraph()->GetArena()) 6706 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); 6707 AddSlowPath(slow_path); 6708 6709 __ jmp(slow_path->GetEntryLabel()); 6710 __ Bind(slow_path->GetExitLabel()); 6711 } 6712 6713 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 6714 Location out, 6715 Location ref, 6716 Location obj, 6717 uint32_t offset, 6718 Location index) { 6719 if (kEmitCompilerReadBarrier) { 6720 // Baker's read barriers shall be handled by the fast path 6721 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). 6722 DCHECK(!kUseBakerReadBarrier); 6723 // If heap poisoning is enabled, unpoisoning will be taken care of 6724 // by the runtime within the slow path. 6725 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 6726 } else if (kPoisonHeapReferences) { 6727 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); 6728 } 6729 } 6730 6731 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 6732 Location out, 6733 Location root) { 6734 DCHECK(kEmitCompilerReadBarrier); 6735 6736 // Insert a slow path based read barrier *after* the GC root load. 6737 // 6738 // Note that GC roots are not affected by heap poisoning, so we do 6739 // not need to do anything special for this here. 6740 SlowPathCode* slow_path = 6741 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); 6742 AddSlowPath(slow_path); 6743 6744 __ jmp(slow_path->GetEntryLabel()); 6745 __ Bind(slow_path->GetExitLabel()); 6746 } 6747 6748 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 6749 // Nothing to do, this should be removed during prepare for register allocator. 6750 LOG(FATAL) << "Unreachable"; 6751 } 6752 6753 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 6754 // Nothing to do, this should be removed during prepare for register allocator. 6755 LOG(FATAL) << "Unreachable"; 6756 } 6757 6758 // Simple implementation of packed switch - generate cascaded compare/jumps. 6759 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 6760 LocationSummary* locations = 6761 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); 6762 locations->SetInAt(0, Location::RequiresRegister()); 6763 locations->AddTemp(Location::RequiresRegister()); 6764 locations->AddTemp(Location::RequiresRegister()); 6765 } 6766 6767 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 6768 int32_t lower_bound = switch_instr->GetStartValue(); 6769 uint32_t num_entries = switch_instr->GetNumEntries(); 6770 LocationSummary* locations = switch_instr->GetLocations(); 6771 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); 6772 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); 6773 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 6774 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 6775 6776 // Should we generate smaller inline compare/jumps? 6777 if (num_entries <= kPackedSwitchJumpTableThreshold) { 6778 // Figure out the correct compare values and jump conditions. 6779 // Handle the first compare/branch as a special case because it might 6780 // jump to the default case. 6781 DCHECK_GT(num_entries, 2u); 6782 Condition first_condition; 6783 uint32_t index; 6784 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 6785 if (lower_bound != 0) { 6786 first_condition = kLess; 6787 __ cmpl(value_reg_in, Immediate(lower_bound)); 6788 __ j(first_condition, codegen_->GetLabelOf(default_block)); 6789 __ j(kEqual, codegen_->GetLabelOf(successors[0])); 6790 6791 index = 1; 6792 } else { 6793 // Handle all the compare/jumps below. 6794 first_condition = kBelow; 6795 index = 0; 6796 } 6797 6798 // Handle the rest of the compare/jumps. 6799 for (; index + 1 < num_entries; index += 2) { 6800 int32_t compare_to_value = lower_bound + index + 1; 6801 __ cmpl(value_reg_in, Immediate(compare_to_value)); 6802 // Jump to successors[index] if value < case_value[index]. 6803 __ j(first_condition, codegen_->GetLabelOf(successors[index])); 6804 // Jump to successors[index + 1] if value == case_value[index + 1]. 6805 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); 6806 } 6807 6808 if (index != num_entries) { 6809 // There are an odd number of entries. Handle the last one. 6810 DCHECK_EQ(index + 1, num_entries); 6811 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index))); 6812 __ j(kEqual, codegen_->GetLabelOf(successors[index])); 6813 } 6814 6815 // And the default for any other value. 6816 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 6817 __ jmp(codegen_->GetLabelOf(default_block)); 6818 } 6819 return; 6820 } 6821 6822 // Remove the bias, if needed. 6823 Register value_reg_out = value_reg_in.AsRegister(); 6824 if (lower_bound != 0) { 6825 __ leal(temp_reg, Address(value_reg_in, -lower_bound)); 6826 value_reg_out = temp_reg.AsRegister(); 6827 } 6828 CpuRegister value_reg(value_reg_out); 6829 6830 // Is the value in range? 6831 __ cmpl(value_reg, Immediate(num_entries - 1)); 6832 __ j(kAbove, codegen_->GetLabelOf(default_block)); 6833 6834 // We are in the range of the table. 6835 // Load the address of the jump table in the constant area. 6836 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr)); 6837 6838 // Load the (signed) offset from the jump table. 6839 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0)); 6840 6841 // Add the offset to the address of the table base. 6842 __ addq(temp_reg, base_reg); 6843 6844 // And jump. 6845 __ jmp(temp_reg); 6846 } 6847 6848 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) { 6849 if (value == 0) { 6850 __ xorl(dest, dest); 6851 } else { 6852 __ movl(dest, Immediate(value)); 6853 } 6854 } 6855 6856 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { 6857 if (value == 0) { 6858 // Clears upper bits too. 6859 __ xorl(dest, dest); 6860 } else if (IsUint<32>(value)) { 6861 // We can use a 32 bit move, as it will zero-extend and is shorter. 6862 __ movl(dest, Immediate(static_cast<int32_t>(value))); 6863 } else { 6864 __ movq(dest, Immediate(value)); 6865 } 6866 } 6867 6868 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) { 6869 if (value == 0) { 6870 __ xorps(dest, dest); 6871 } else { 6872 __ movss(dest, LiteralInt32Address(value)); 6873 } 6874 } 6875 6876 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) { 6877 if (value == 0) { 6878 __ xorpd(dest, dest); 6879 } else { 6880 __ movsd(dest, LiteralInt64Address(value)); 6881 } 6882 } 6883 6884 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) { 6885 Load32BitValue(dest, bit_cast<int32_t, float>(value)); 6886 } 6887 6888 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) { 6889 Load64BitValue(dest, bit_cast<int64_t, double>(value)); 6890 } 6891 6892 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) { 6893 if (value == 0) { 6894 __ testl(dest, dest); 6895 } else { 6896 __ cmpl(dest, Immediate(value)); 6897 } 6898 } 6899 6900 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) { 6901 if (IsInt<32>(value)) { 6902 if (value == 0) { 6903 __ testq(dest, dest); 6904 } else { 6905 __ cmpq(dest, Immediate(static_cast<int32_t>(value))); 6906 } 6907 } else { 6908 // Value won't fit in an int. 6909 __ cmpq(dest, LiteralInt64Address(value)); 6910 } 6911 } 6912 6913 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) { 6914 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 6915 GenerateIntCompare(lhs_reg, rhs); 6916 } 6917 6918 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) { 6919 if (rhs.IsConstant()) { 6920 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); 6921 Compare32BitValue(lhs, value); 6922 } else if (rhs.IsStackSlot()) { 6923 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex())); 6924 } else { 6925 __ cmpl(lhs, rhs.AsRegister<CpuRegister>()); 6926 } 6927 } 6928 6929 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) { 6930 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 6931 if (rhs.IsConstant()) { 6932 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue(); 6933 Compare64BitValue(lhs_reg, value); 6934 } else if (rhs.IsDoubleStackSlot()) { 6935 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 6936 } else { 6937 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>()); 6938 } 6939 } 6940 6941 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj, 6942 Location index, 6943 ScaleFactor scale, 6944 uint32_t data_offset) { 6945 return index.IsConstant() ? 6946 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : 6947 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); 6948 } 6949 6950 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { 6951 DCHECK(dest.IsDoubleStackSlot()); 6952 if (IsInt<32>(value)) { 6953 // Can move directly as an int32 constant. 6954 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), 6955 Immediate(static_cast<int32_t>(value))); 6956 } else { 6957 Load64BitValue(CpuRegister(TMP), value); 6958 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP)); 6959 } 6960 } 6961 6962 /** 6963 * Class to handle late fixup of offsets into constant area. 6964 */ 6965 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { 6966 public: 6967 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset) 6968 : codegen_(&codegen), offset_into_constant_area_(offset) {} 6969 6970 protected: 6971 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } 6972 6973 CodeGeneratorX86_64* codegen_; 6974 6975 private: 6976 void Process(const MemoryRegion& region, int pos) OVERRIDE { 6977 // Patch the correct offset for the instruction. We use the address of the 6978 // 'next' instruction, which is 'pos' (patch the 4 bytes before). 6979 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; 6980 int32_t relative_position = constant_offset - pos; 6981 6982 // Patch in the right value. 6983 region.StoreUnaligned<int32_t>(pos - 4, relative_position); 6984 } 6985 6986 // Location in constant area that the fixup refers to. 6987 size_t offset_into_constant_area_; 6988 }; 6989 6990 /** 6991 t * Class to handle late fixup of offsets to a jump table that will be created in the 6992 * constant area. 6993 */ 6994 class JumpTableRIPFixup : public RIPFixup { 6995 public: 6996 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr) 6997 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {} 6998 6999 void CreateJumpTable() { 7000 X86_64Assembler* assembler = codegen_->GetAssembler(); 7001 7002 // Ensure that the reference to the jump table has the correct offset. 7003 const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); 7004 SetOffset(offset_in_constant_table); 7005 7006 // Compute the offset from the start of the function to this jump table. 7007 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table; 7008 7009 // Populate the jump table with the correct values for the jump table. 7010 int32_t num_entries = switch_instr_->GetNumEntries(); 7011 HBasicBlock* block = switch_instr_->GetBlock(); 7012 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); 7013 // The value that we want is the target offset - the position of the table. 7014 for (int32_t i = 0; i < num_entries; i++) { 7015 HBasicBlock* b = successors[i]; 7016 Label* l = codegen_->GetLabelOf(b); 7017 DCHECK(l->IsBound()); 7018 int32_t offset_to_block = l->Position() - current_table_offset; 7019 assembler->AppendInt32(offset_to_block); 7020 } 7021 } 7022 7023 private: 7024 const HPackedSwitch* switch_instr_; 7025 }; 7026 7027 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { 7028 // Generate the constant area if needed. 7029 X86_64Assembler* assembler = GetAssembler(); 7030 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { 7031 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values. 7032 assembler->Align(4, 0); 7033 constant_area_start_ = assembler->CodeSize(); 7034 7035 // Populate any jump tables. 7036 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { 7037 jump_table->CreateJumpTable(); 7038 } 7039 7040 // And now add the constant area to the generated code. 7041 assembler->AddConstantArea(); 7042 } 7043 7044 // And finish up. 7045 CodeGenerator::Finalize(allocator); 7046 } 7047 7048 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { 7049 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); 7050 return Address::RIP(fixup); 7051 } 7052 7053 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { 7054 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); 7055 return Address::RIP(fixup); 7056 } 7057 7058 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { 7059 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); 7060 return Address::RIP(fixup); 7061 } 7062 7063 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { 7064 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); 7065 return Address::RIP(fixup); 7066 } 7067 7068 // TODO: trg as memory. 7069 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) { 7070 if (!trg.IsValid()) { 7071 DCHECK_EQ(type, Primitive::kPrimVoid); 7072 return; 7073 } 7074 7075 DCHECK_NE(type, Primitive::kPrimVoid); 7076 7077 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type); 7078 if (trg.Equals(return_loc)) { 7079 return; 7080 } 7081 7082 // Let the parallel move resolver take care of all of this. 7083 HParallelMove parallel_move(GetGraph()->GetArena()); 7084 parallel_move.AddMove(return_loc, trg, type, nullptr); 7085 GetMoveResolver()->EmitNativeCode(¶llel_move); 7086 } 7087 7088 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { 7089 // Create a fixup to be used to create and address the jump table. 7090 JumpTableRIPFixup* table_fixup = 7091 new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr); 7092 7093 // We have to populate the jump tables. 7094 fixups_to_jump_tables_.push_back(table_fixup); 7095 return Address::RIP(table_fixup); 7096 } 7097 7098 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, 7099 const Address& addr_high, 7100 int64_t v, 7101 HInstruction* instruction) { 7102 if (IsInt<32>(v)) { 7103 int32_t v_32 = v; 7104 __ movq(addr_low, Immediate(v_32)); 7105 MaybeRecordImplicitNullCheck(instruction); 7106 } else { 7107 // Didn't fit in a register. Do it in pieces. 7108 int32_t low_v = Low32Bits(v); 7109 int32_t high_v = High32Bits(v); 7110 __ movl(addr_low, Immediate(low_v)); 7111 MaybeRecordImplicitNullCheck(instruction); 7112 __ movl(addr_high, Immediate(high_v)); 7113 } 7114 } 7115 7116 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, 7117 const uint8_t* roots_data, 7118 const PatchInfo<Label>& info, 7119 uint64_t index_in_table) const { 7120 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 7121 uintptr_t address = 7122 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 7123 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; 7124 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = 7125 dchecked_integral_cast<uint32_t>(address); 7126 } 7127 7128 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 7129 for (const PatchInfo<Label>& info : jit_string_patches_) { 7130 const auto it = jit_string_roots_.find( 7131 StringReference(&info.dex_file, dex::StringIndex(info.index))); 7132 DCHECK(it != jit_string_roots_.end()); 7133 uint64_t index_in_table = it->second; 7134 PatchJitRootUse(code, roots_data, info, index_in_table); 7135 } 7136 7137 for (const PatchInfo<Label>& info : jit_class_patches_) { 7138 const auto it = jit_class_roots_.find( 7139 TypeReference(&info.dex_file, dex::TypeIndex(info.index))); 7140 DCHECK(it != jit_class_roots_.end()); 7141 uint64_t index_in_table = it->second; 7142 PatchJitRootUse(code, roots_data, info, index_in_table); 7143 } 7144 } 7145 7146 #undef __ 7147 7148 } // namespace x86_64 7149 } // namespace art 7150