1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86_64.h" 18 19 #include "art_method.h" 20 #include "class_table.h" 21 #include "code_generator_utils.h" 22 #include "compiled_method.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "gc/accounting/card_table.h" 25 #include "heap_poisoning.h" 26 #include "intrinsics.h" 27 #include "intrinsics_x86_64.h" 28 #include "linker/linker_patch.h" 29 #include "lock_word.h" 30 #include "mirror/array-inl.h" 31 #include "mirror/class-inl.h" 32 #include "mirror/object_reference.h" 33 #include "thread.h" 34 #include "utils/assembler.h" 35 #include "utils/stack_checks.h" 36 #include "utils/x86_64/assembler_x86_64.h" 37 #include "utils/x86_64/managed_register_x86_64.h" 38 39 namespace art { 40 41 template<class MirrorType> 42 class GcRoot; 43 44 namespace x86_64 { 45 46 static constexpr int kCurrentMethodStackOffset = 0; 47 static constexpr Register kMethodRegisterArgument = RDI; 48 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump 49 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 50 // generates less code/data with a small num_entries. 51 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; 52 53 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; 54 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; 55 56 static constexpr int kC2ConditionMask = 0x400; 57 58 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 59 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT 60 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() 61 62 class NullCheckSlowPathX86_64 : public SlowPathCode { 63 public: 64 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} 65 66 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 67 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 68 __ Bind(GetEntryLabel()); 69 if (instruction_->CanThrowIntoCatchBlock()) { 70 // Live registers will be restored in the catch block if caught. 71 SaveLiveRegisters(codegen, instruction_->GetLocations()); 72 } 73 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer, 74 instruction_, 75 instruction_->GetDexPc(), 76 this); 77 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 78 } 79 80 bool IsFatal() const OVERRIDE { return true; } 81 82 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; } 83 84 private: 85 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); 86 }; 87 88 class DivZeroCheckSlowPathX86_64 : public SlowPathCode { 89 public: 90 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} 91 92 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 93 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 94 __ Bind(GetEntryLabel()); 95 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 96 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 97 } 98 99 bool IsFatal() const OVERRIDE { return true; } 100 101 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; } 102 103 private: 104 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); 105 }; 106 107 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { 108 public: 109 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div) 110 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} 111 112 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 113 __ Bind(GetEntryLabel()); 114 if (type_ == DataType::Type::kInt32) { 115 if (is_div_) { 116 __ negl(cpu_reg_); 117 } else { 118 __ xorl(cpu_reg_, cpu_reg_); 119 } 120 121 } else { 122 DCHECK_EQ(DataType::Type::kInt64, type_); 123 if (is_div_) { 124 __ negq(cpu_reg_); 125 } else { 126 __ xorl(cpu_reg_, cpu_reg_); 127 } 128 } 129 __ jmp(GetExitLabel()); 130 } 131 132 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; } 133 134 private: 135 const CpuRegister cpu_reg_; 136 const DataType::Type type_; 137 const bool is_div_; 138 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); 139 }; 140 141 class SuspendCheckSlowPathX86_64 : public SlowPathCode { 142 public: 143 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) 144 : SlowPathCode(instruction), successor_(successor) {} 145 146 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 147 LocationSummary* locations = instruction_->GetLocations(); 148 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 149 __ Bind(GetEntryLabel()); 150 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD. 151 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 152 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 153 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD. 154 if (successor_ == nullptr) { 155 __ jmp(GetReturnLabel()); 156 } else { 157 __ jmp(x86_64_codegen->GetLabelOf(successor_)); 158 } 159 } 160 161 Label* GetReturnLabel() { 162 DCHECK(successor_ == nullptr); 163 return &return_label_; 164 } 165 166 HBasicBlock* GetSuccessor() const { 167 return successor_; 168 } 169 170 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; } 171 172 private: 173 HBasicBlock* const successor_; 174 Label return_label_; 175 176 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64); 177 }; 178 179 class BoundsCheckSlowPathX86_64 : public SlowPathCode { 180 public: 181 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) 182 : SlowPathCode(instruction) {} 183 184 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 185 LocationSummary* locations = instruction_->GetLocations(); 186 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 187 __ Bind(GetEntryLabel()); 188 if (instruction_->CanThrowIntoCatchBlock()) { 189 // Live registers will be restored in the catch block if caught. 190 SaveLiveRegisters(codegen, instruction_->GetLocations()); 191 } 192 // Are we using an array length from memory? 193 HInstruction* array_length = instruction_->InputAt(1); 194 Location length_loc = locations->InAt(1); 195 InvokeRuntimeCallingConvention calling_convention; 196 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { 197 // Load the array length into our temporary. 198 HArrayLength* length = array_length->AsArrayLength(); 199 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); 200 Location array_loc = array_length->GetLocations()->InAt(0); 201 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 202 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); 203 // Check for conflicts with index. 204 if (length_loc.Equals(locations->InAt(0))) { 205 // We know we aren't using parameter 2. 206 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); 207 } 208 __ movl(length_loc.AsRegister<CpuRegister>(), array_len); 209 if (mirror::kUseStringCompression && length->IsStringLength()) { 210 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1)); 211 } 212 } 213 214 // We're moving two locations to locations that could overlap, so we need a parallel 215 // move resolver. 216 codegen->EmitParallelMoves( 217 locations->InAt(0), 218 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 219 DataType::Type::kInt32, 220 length_loc, 221 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 222 DataType::Type::kInt32); 223 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 224 ? kQuickThrowStringBounds 225 : kQuickThrowArrayBounds; 226 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 227 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 228 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 229 } 230 231 bool IsFatal() const OVERRIDE { return true; } 232 233 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; } 234 235 private: 236 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); 237 }; 238 239 class LoadClassSlowPathX86_64 : public SlowPathCode { 240 public: 241 LoadClassSlowPathX86_64(HLoadClass* cls, 242 HInstruction* at, 243 uint32_t dex_pc, 244 bool do_clinit) 245 : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { 246 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 247 } 248 249 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 250 LocationSummary* locations = instruction_->GetLocations(); 251 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 252 __ Bind(GetEntryLabel()); 253 254 SaveLiveRegisters(codegen, locations); 255 256 // Custom calling convention: RAX serves as both input and output. 257 __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_)); 258 x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType, 259 instruction_, 260 dex_pc_, 261 this); 262 if (do_clinit_) { 263 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); 264 } else { 265 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); 266 } 267 268 Location out = locations->Out(); 269 // Move the class to the desired location. 270 if (out.IsValid()) { 271 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 272 x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); 273 } 274 275 RestoreLiveRegisters(codegen, locations); 276 __ jmp(GetExitLabel()); 277 } 278 279 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; } 280 281 private: 282 // The class this slow path will load. 283 HLoadClass* const cls_; 284 285 // The dex PC of `at_`. 286 const uint32_t dex_pc_; 287 288 // Whether to initialize the class. 289 const bool do_clinit_; 290 291 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); 292 }; 293 294 class LoadStringSlowPathX86_64 : public SlowPathCode { 295 public: 296 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} 297 298 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 299 LocationSummary* locations = instruction_->GetLocations(); 300 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 301 302 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 303 __ Bind(GetEntryLabel()); 304 SaveLiveRegisters(codegen, locations); 305 306 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 307 // Custom calling convention: RAX serves as both input and output. 308 __ movl(CpuRegister(RAX), Immediate(string_index.index_)); 309 x86_64_codegen->InvokeRuntime(kQuickResolveString, 310 instruction_, 311 instruction_->GetDexPc(), 312 this); 313 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 314 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 315 RestoreLiveRegisters(codegen, locations); 316 317 __ jmp(GetExitLabel()); 318 } 319 320 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } 321 322 private: 323 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); 324 }; 325 326 class TypeCheckSlowPathX86_64 : public SlowPathCode { 327 public: 328 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) 329 : SlowPathCode(instruction), is_fatal_(is_fatal) {} 330 331 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 332 LocationSummary* locations = instruction_->GetLocations(); 333 uint32_t dex_pc = instruction_->GetDexPc(); 334 DCHECK(instruction_->IsCheckCast() 335 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 336 337 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 338 __ Bind(GetEntryLabel()); 339 340 if (kPoisonHeapReferences && 341 instruction_->IsCheckCast() && 342 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) { 343 // First, unpoison the `cls` reference that was poisoned for direct memory comparison. 344 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>()); 345 } 346 347 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 348 SaveLiveRegisters(codegen, locations); 349 } 350 351 // We're moving two locations to locations that could overlap, so we need a parallel 352 // move resolver. 353 InvokeRuntimeCallingConvention calling_convention; 354 codegen->EmitParallelMoves(locations->InAt(0), 355 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 356 DataType::Type::kReference, 357 locations->InAt(1), 358 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 359 DataType::Type::kReference); 360 if (instruction_->IsInstanceOf()) { 361 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 362 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 363 } else { 364 DCHECK(instruction_->IsCheckCast()); 365 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 366 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 367 } 368 369 if (!is_fatal_) { 370 if (instruction_->IsInstanceOf()) { 371 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 372 } 373 374 RestoreLiveRegisters(codegen, locations); 375 __ jmp(GetExitLabel()); 376 } 377 } 378 379 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; } 380 381 bool IsFatal() const OVERRIDE { return is_fatal_; } 382 383 private: 384 const bool is_fatal_; 385 386 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); 387 }; 388 389 class DeoptimizationSlowPathX86_64 : public SlowPathCode { 390 public: 391 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) 392 : SlowPathCode(instruction) {} 393 394 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 395 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 396 __ Bind(GetEntryLabel()); 397 LocationSummary* locations = instruction_->GetLocations(); 398 SaveLiveRegisters(codegen, locations); 399 InvokeRuntimeCallingConvention calling_convention; 400 x86_64_codegen->Load32BitValue( 401 CpuRegister(calling_convention.GetRegisterAt(0)), 402 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 403 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 404 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 405 } 406 407 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } 408 409 private: 410 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); 411 }; 412 413 class ArraySetSlowPathX86_64 : public SlowPathCode { 414 public: 415 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} 416 417 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 418 LocationSummary* locations = instruction_->GetLocations(); 419 __ Bind(GetEntryLabel()); 420 SaveLiveRegisters(codegen, locations); 421 422 InvokeRuntimeCallingConvention calling_convention; 423 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 424 parallel_move.AddMove( 425 locations->InAt(0), 426 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 427 DataType::Type::kReference, 428 nullptr); 429 parallel_move.AddMove( 430 locations->InAt(1), 431 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 432 DataType::Type::kInt32, 433 nullptr); 434 parallel_move.AddMove( 435 locations->InAt(2), 436 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 437 DataType::Type::kReference, 438 nullptr); 439 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 440 441 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 442 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 443 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 444 RestoreLiveRegisters(codegen, locations); 445 __ jmp(GetExitLabel()); 446 } 447 448 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; } 449 450 private: 451 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); 452 }; 453 454 // Slow path marking an object reference `ref` during a read 455 // barrier. The field `obj.field` in the object `obj` holding this 456 // reference does not get updated by this slow path after marking (see 457 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). 458 // 459 // This means that after the execution of this slow path, `ref` will 460 // always be up-to-date, but `obj.field` may not; i.e., after the 461 // flip, `ref` will be a to-space reference, but `obj.field` will 462 // probably still be a from-space reference (unless it gets updated by 463 // another thread, or if another thread installed another object 464 // reference (different from `ref`) in `obj.field`). 465 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { 466 public: 467 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, 468 Location ref, 469 bool unpoison_ref_before_marking) 470 : SlowPathCode(instruction), 471 ref_(ref), 472 unpoison_ref_before_marking_(unpoison_ref_before_marking) { 473 DCHECK(kEmitCompilerReadBarrier); 474 } 475 476 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; } 477 478 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 479 LocationSummary* locations = instruction_->GetLocations(); 480 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 481 Register ref_reg = ref_cpu_reg.AsRegister(); 482 DCHECK(locations->CanCall()); 483 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 484 DCHECK(instruction_->IsInstanceFieldGet() || 485 instruction_->IsStaticFieldGet() || 486 instruction_->IsArrayGet() || 487 instruction_->IsArraySet() || 488 instruction_->IsLoadClass() || 489 instruction_->IsLoadString() || 490 instruction_->IsInstanceOf() || 491 instruction_->IsCheckCast() || 492 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 493 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 494 << "Unexpected instruction in read barrier marking slow path: " 495 << instruction_->DebugName(); 496 497 __ Bind(GetEntryLabel()); 498 if (unpoison_ref_before_marking_) { 499 // Object* ref = ref_addr->AsMirrorPtr() 500 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 501 } 502 // No need to save live registers; it's taken care of by the 503 // entrypoint. Also, there is no need to update the stack mask, 504 // as this runtime call will not trigger a garbage collection. 505 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 506 DCHECK_NE(ref_reg, RSP); 507 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 508 // "Compact" slow path, saving two moves. 509 // 510 // Instead of using the standard runtime calling convention (input 511 // and output in R0): 512 // 513 // RDI <- ref 514 // RAX <- ReadBarrierMark(RDI) 515 // ref <- RAX 516 // 517 // we just use rX (the register containing `ref`) as input and output 518 // of a dedicated entrypoint: 519 // 520 // rX <- ReadBarrierMarkRegX(rX) 521 // 522 int32_t entry_point_offset = 523 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 524 // This runtime call does not require a stack map. 525 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 526 __ jmp(GetExitLabel()); 527 } 528 529 private: 530 // The location (register) of the marked object reference. 531 const Location ref_; 532 // Should the reference in `ref_` be unpoisoned prior to marking it? 533 const bool unpoison_ref_before_marking_; 534 535 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); 536 }; 537 538 // Slow path marking an object reference `ref` during a read barrier, 539 // and if needed, atomically updating the field `obj.field` in the 540 // object `obj` holding this reference after marking (contrary to 541 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update 542 // `obj.field`). 543 // 544 // This means that after the execution of this slow path, both `ref` 545 // and `obj.field` will be up-to-date; i.e., after the flip, both will 546 // hold the same to-space reference (unless another thread installed 547 // another object reference (different from `ref`) in `obj.field`). 548 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { 549 public: 550 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, 551 Location ref, 552 CpuRegister obj, 553 const Address& field_addr, 554 bool unpoison_ref_before_marking, 555 CpuRegister temp1, 556 CpuRegister temp2) 557 : SlowPathCode(instruction), 558 ref_(ref), 559 obj_(obj), 560 field_addr_(field_addr), 561 unpoison_ref_before_marking_(unpoison_ref_before_marking), 562 temp1_(temp1), 563 temp2_(temp2) { 564 DCHECK(kEmitCompilerReadBarrier); 565 } 566 567 const char* GetDescription() const OVERRIDE { 568 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; 569 } 570 571 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 572 LocationSummary* locations = instruction_->GetLocations(); 573 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 574 Register ref_reg = ref_cpu_reg.AsRegister(); 575 DCHECK(locations->CanCall()); 576 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 577 // This slow path is only used by the UnsafeCASObject intrinsic. 578 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 579 << "Unexpected instruction in read barrier marking and field updating slow path: " 580 << instruction_->DebugName(); 581 DCHECK(instruction_->GetLocations()->Intrinsified()); 582 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 583 584 __ Bind(GetEntryLabel()); 585 if (unpoison_ref_before_marking_) { 586 // Object* ref = ref_addr->AsMirrorPtr() 587 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 588 } 589 590 // Save the old (unpoisoned) reference. 591 __ movl(temp1_, ref_cpu_reg); 592 593 // No need to save live registers; it's taken care of by the 594 // entrypoint. Also, there is no need to update the stack mask, 595 // as this runtime call will not trigger a garbage collection. 596 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 597 DCHECK_NE(ref_reg, RSP); 598 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 599 // "Compact" slow path, saving two moves. 600 // 601 // Instead of using the standard runtime calling convention (input 602 // and output in R0): 603 // 604 // RDI <- ref 605 // RAX <- ReadBarrierMark(RDI) 606 // ref <- RAX 607 // 608 // we just use rX (the register containing `ref`) as input and output 609 // of a dedicated entrypoint: 610 // 611 // rX <- ReadBarrierMarkRegX(rX) 612 // 613 int32_t entry_point_offset = 614 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 615 // This runtime call does not require a stack map. 616 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 617 618 // If the new reference is different from the old reference, 619 // update the field in the holder (`*field_addr`). 620 // 621 // Note that this field could also hold a different object, if 622 // another thread had concurrently changed it. In that case, the 623 // LOCK CMPXCHGL instruction in the compare-and-set (CAS) 624 // operation below would abort the CAS, leaving the field as-is. 625 NearLabel done; 626 __ cmpl(temp1_, ref_cpu_reg); 627 __ j(kEqual, &done); 628 629 // Update the the holder's field atomically. This may fail if 630 // mutator updates before us, but it's OK. This is achived 631 // using a strong compare-and-set (CAS) operation with relaxed 632 // memory synchronization ordering, where the expected value is 633 // the old reference and the desired value is the new reference. 634 // This operation is implemented with a 32-bit LOCK CMPXLCHG 635 // instruction, which requires the expected value (the old 636 // reference) to be in EAX. Save RAX beforehand, and move the 637 // expected value (stored in `temp1_`) into EAX. 638 __ movq(temp2_, CpuRegister(RAX)); 639 __ movl(CpuRegister(RAX), temp1_); 640 641 // Convenience aliases. 642 CpuRegister base = obj_; 643 CpuRegister expected = CpuRegister(RAX); 644 CpuRegister value = ref_cpu_reg; 645 646 bool base_equals_value = (base.AsRegister() == value.AsRegister()); 647 Register value_reg = ref_reg; 648 if (kPoisonHeapReferences) { 649 if (base_equals_value) { 650 // If `base` and `value` are the same register location, move 651 // `value_reg` to a temporary register. This way, poisoning 652 // `value_reg` won't invalidate `base`. 653 value_reg = temp1_.AsRegister(); 654 __ movl(CpuRegister(value_reg), base); 655 } 656 657 // Check that the register allocator did not assign the location 658 // of `expected` (RAX) to `value` nor to `base`, so that heap 659 // poisoning (when enabled) works as intended below. 660 // - If `value` were equal to `expected`, both references would 661 // be poisoned twice, meaning they would not be poisoned at 662 // all, as heap poisoning uses address negation. 663 // - If `base` were equal to `expected`, poisoning `expected` 664 // would invalidate `base`. 665 DCHECK_NE(value_reg, expected.AsRegister()); 666 DCHECK_NE(base.AsRegister(), expected.AsRegister()); 667 668 __ PoisonHeapReference(expected); 669 __ PoisonHeapReference(CpuRegister(value_reg)); 670 } 671 672 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); 673 674 // If heap poisoning is enabled, we need to unpoison the values 675 // that were poisoned earlier. 676 if (kPoisonHeapReferences) { 677 if (base_equals_value) { 678 // `value_reg` has been moved to a temporary register, no need 679 // to unpoison it. 680 } else { 681 __ UnpoisonHeapReference(CpuRegister(value_reg)); 682 } 683 // No need to unpoison `expected` (RAX), as it is be overwritten below. 684 } 685 686 // Restore RAX. 687 __ movq(CpuRegister(RAX), temp2_); 688 689 __ Bind(&done); 690 __ jmp(GetExitLabel()); 691 } 692 693 private: 694 // The location (register) of the marked object reference. 695 const Location ref_; 696 // The register containing the object holding the marked object reference field. 697 const CpuRegister obj_; 698 // The address of the marked reference field. The base of this address must be `obj_`. 699 const Address field_addr_; 700 701 // Should the reference in `ref_` be unpoisoned prior to marking it? 702 const bool unpoison_ref_before_marking_; 703 704 const CpuRegister temp1_; 705 const CpuRegister temp2_; 706 707 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); 708 }; 709 710 // Slow path generating a read barrier for a heap reference. 711 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { 712 public: 713 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, 714 Location out, 715 Location ref, 716 Location obj, 717 uint32_t offset, 718 Location index) 719 : SlowPathCode(instruction), 720 out_(out), 721 ref_(ref), 722 obj_(obj), 723 offset_(offset), 724 index_(index) { 725 DCHECK(kEmitCompilerReadBarrier); 726 // If `obj` is equal to `out` or `ref`, it means the initial 727 // object has been overwritten by (or after) the heap object 728 // reference load to be instrumented, e.g.: 729 // 730 // __ movl(out, Address(out, offset)); 731 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 732 // 733 // In that case, we have lost the information about the original 734 // object, and the emitted read barrier cannot work properly. 735 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 736 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 737 } 738 739 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 740 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 741 LocationSummary* locations = instruction_->GetLocations(); 742 CpuRegister reg_out = out_.AsRegister<CpuRegister>(); 743 DCHECK(locations->CanCall()); 744 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; 745 DCHECK(instruction_->IsInstanceFieldGet() || 746 instruction_->IsStaticFieldGet() || 747 instruction_->IsArrayGet() || 748 instruction_->IsInstanceOf() || 749 instruction_->IsCheckCast() || 750 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 751 << "Unexpected instruction in read barrier for heap reference slow path: " 752 << instruction_->DebugName(); 753 754 __ Bind(GetEntryLabel()); 755 SaveLiveRegisters(codegen, locations); 756 757 // We may have to change the index's value, but as `index_` is a 758 // constant member (like other "inputs" of this slow path), 759 // introduce a copy of it, `index`. 760 Location index = index_; 761 if (index_.IsValid()) { 762 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 763 if (instruction_->IsArrayGet()) { 764 // Compute real offset and store it in index_. 765 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); 766 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); 767 if (codegen->IsCoreCalleeSaveRegister(index_reg)) { 768 // We are about to change the value of `index_reg` (see the 769 // calls to art::x86_64::X86_64Assembler::shll and 770 // art::x86_64::X86_64Assembler::AddImmediate below), but it 771 // has not been saved by the previous call to 772 // art::SlowPathCode::SaveLiveRegisters, as it is a 773 // callee-save register -- 774 // art::SlowPathCode::SaveLiveRegisters does not consider 775 // callee-save registers, as it has been designed with the 776 // assumption that callee-save registers are supposed to be 777 // handled by the called function. So, as a callee-save 778 // register, `index_reg` _would_ eventually be saved onto 779 // the stack, but it would be too late: we would have 780 // changed its value earlier. Therefore, we manually save 781 // it here into another freely available register, 782 // `free_reg`, chosen of course among the caller-save 783 // registers (as a callee-save `free_reg` register would 784 // exhibit the same problem). 785 // 786 // Note we could have requested a temporary register from 787 // the register allocator instead; but we prefer not to, as 788 // this is a slow path, and we know we can find a 789 // caller-save register that is available. 790 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); 791 __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); 792 index_reg = free_reg; 793 index = Location::RegisterLocation(index_reg); 794 } else { 795 // The initial register stored in `index_` has already been 796 // saved in the call to art::SlowPathCode::SaveLiveRegisters 797 // (as it is not a callee-save register), so we can freely 798 // use it. 799 } 800 // Shifting the index value contained in `index_reg` by the 801 // scale factor (2) cannot overflow in practice, as the 802 // runtime is unable to allocate object arrays with a size 803 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). 804 __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); 805 static_assert( 806 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 807 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 808 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); 809 } else { 810 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 811 // intrinsics, `index_` is not shifted by a scale factor of 2 812 // (as in the case of ArrayGet), as it is actually an offset 813 // to an object field within an object. 814 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 815 DCHECK(instruction_->GetLocations()->Intrinsified()); 816 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 817 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 818 << instruction_->AsInvoke()->GetIntrinsic(); 819 DCHECK_EQ(offset_, 0U); 820 DCHECK(index_.IsRegister()); 821 } 822 } 823 824 // We're moving two or three locations to locations that could 825 // overlap, so we need a parallel move resolver. 826 InvokeRuntimeCallingConvention calling_convention; 827 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 828 parallel_move.AddMove(ref_, 829 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 830 DataType::Type::kReference, 831 nullptr); 832 parallel_move.AddMove(obj_, 833 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 834 DataType::Type::kReference, 835 nullptr); 836 if (index.IsValid()) { 837 parallel_move.AddMove(index, 838 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 839 DataType::Type::kInt32, 840 nullptr); 841 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 842 } else { 843 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 844 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); 845 } 846 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 847 instruction_, 848 instruction_->GetDexPc(), 849 this); 850 CheckEntrypointTypes< 851 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 852 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 853 854 RestoreLiveRegisters(codegen, locations); 855 __ jmp(GetExitLabel()); 856 } 857 858 const char* GetDescription() const OVERRIDE { 859 return "ReadBarrierForHeapReferenceSlowPathX86_64"; 860 } 861 862 private: 863 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 864 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister()); 865 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister()); 866 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 867 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 868 return static_cast<CpuRegister>(i); 869 } 870 } 871 // We shall never fail to find a free caller-save register, as 872 // there are more than two core caller-save registers on x86-64 873 // (meaning it is possible to find one which is different from 874 // `ref` and `obj`). 875 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 876 LOG(FATAL) << "Could not find a free caller-save register"; 877 UNREACHABLE(); 878 } 879 880 const Location out_; 881 const Location ref_; 882 const Location obj_; 883 const uint32_t offset_; 884 // An additional location containing an index to an array. 885 // Only used for HArrayGet and the UnsafeGetObject & 886 // UnsafeGetObjectVolatile intrinsics. 887 const Location index_; 888 889 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); 890 }; 891 892 // Slow path generating a read barrier for a GC root. 893 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { 894 public: 895 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) 896 : SlowPathCode(instruction), out_(out), root_(root) { 897 DCHECK(kEmitCompilerReadBarrier); 898 } 899 900 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 901 LocationSummary* locations = instruction_->GetLocations(); 902 DCHECK(locations->CanCall()); 903 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 904 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 905 << "Unexpected instruction in read barrier for GC root slow path: " 906 << instruction_->DebugName(); 907 908 __ Bind(GetEntryLabel()); 909 SaveLiveRegisters(codegen, locations); 910 911 InvokeRuntimeCallingConvention calling_convention; 912 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 913 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); 914 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 915 instruction_, 916 instruction_->GetDexPc(), 917 this); 918 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 919 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 920 921 RestoreLiveRegisters(codegen, locations); 922 __ jmp(GetExitLabel()); 923 } 924 925 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } 926 927 private: 928 const Location out_; 929 const Location root_; 930 931 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); 932 }; 933 934 #undef __ 935 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 936 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT 937 938 inline Condition X86_64IntegerCondition(IfCondition cond) { 939 switch (cond) { 940 case kCondEQ: return kEqual; 941 case kCondNE: return kNotEqual; 942 case kCondLT: return kLess; 943 case kCondLE: return kLessEqual; 944 case kCondGT: return kGreater; 945 case kCondGE: return kGreaterEqual; 946 case kCondB: return kBelow; 947 case kCondBE: return kBelowEqual; 948 case kCondA: return kAbove; 949 case kCondAE: return kAboveEqual; 950 } 951 LOG(FATAL) << "Unreachable"; 952 UNREACHABLE(); 953 } 954 955 // Maps FP condition to x86_64 name. 956 inline Condition X86_64FPCondition(IfCondition cond) { 957 switch (cond) { 958 case kCondEQ: return kEqual; 959 case kCondNE: return kNotEqual; 960 case kCondLT: return kBelow; 961 case kCondLE: return kBelowEqual; 962 case kCondGT: return kAbove; 963 case kCondGE: return kAboveEqual; 964 default: break; // should not happen 965 } 966 LOG(FATAL) << "Unreachable"; 967 UNREACHABLE(); 968 } 969 970 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( 971 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 972 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { 973 return desired_dispatch_info; 974 } 975 976 void CodeGeneratorX86_64::GenerateStaticOrDirectCall( 977 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 978 // All registers are assumed to be correctly set up. 979 980 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 981 switch (invoke->GetMethodLoadKind()) { 982 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 983 // temp = thread->string_init_entrypoint 984 uint32_t offset = 985 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 986 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true)); 987 break; 988 } 989 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 990 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 991 break; 992 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: 993 DCHECK(GetCompilerOptions().IsBootImage()); 994 __ leal(temp.AsRegister<CpuRegister>(), 995 Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); 996 RecordBootImageMethodPatch(invoke); 997 break; 998 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: 999 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); 1000 break; 1001 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 1002 __ movq(temp.AsRegister<CpuRegister>(), 1003 Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); 1004 RecordMethodBssEntryPatch(invoke); 1005 break; 1006 } 1007 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 1008 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 1009 return; // No code pointer retrieval; the runtime performs the call directly. 1010 } 1011 } 1012 1013 switch (invoke->GetCodePtrLocation()) { 1014 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 1015 __ call(&frame_entry_label_); 1016 break; 1017 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 1018 // (callee_method + offset_of_quick_compiled_code)() 1019 __ call(Address(callee_method.AsRegister<CpuRegister>(), 1020 ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1021 kX86_64PointerSize).SizeValue())); 1022 break; 1023 } 1024 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1025 1026 DCHECK(!IsLeafMethod()); 1027 } 1028 1029 void CodeGeneratorX86_64::GenerateVirtualCall( 1030 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 1031 CpuRegister temp = temp_in.AsRegister<CpuRegister>(); 1032 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 1033 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); 1034 1035 // Use the calling convention instead of the location of the receiver, as 1036 // intrinsics may have put the receiver in a different register. In the intrinsics 1037 // slow path, the arguments have been moved to the right place, so here we are 1038 // guaranteed that the receiver is the first register of the calling convention. 1039 InvokeDexCallingConvention calling_convention; 1040 Register receiver = calling_convention.GetRegisterAt(0); 1041 1042 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 1043 // /* HeapReference<Class> */ temp = receiver->klass_ 1044 __ movl(temp, Address(CpuRegister(receiver), class_offset)); 1045 MaybeRecordImplicitNullCheck(invoke); 1046 // Instead of simply (possibly) unpoisoning `temp` here, we should 1047 // emit a read barrier for the previous class reference load. 1048 // However this is not required in practice, as this is an 1049 // intermediate/temporary reference and because the current 1050 // concurrent copying collector keeps the from-space memory 1051 // intact/accessible until the end of the marking phase (the 1052 // concurrent copying collector may not in the future). 1053 __ MaybeUnpoisonHeapReference(temp); 1054 // temp = temp->GetMethodAt(method_offset); 1055 __ movq(temp, Address(temp, method_offset)); 1056 // call temp->GetEntryPoint(); 1057 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1058 kX86_64PointerSize).SizeValue())); 1059 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1060 } 1061 1062 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { 1063 boot_image_method_patches_.emplace_back( 1064 invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); 1065 __ Bind(&boot_image_method_patches_.back().label); 1066 } 1067 1068 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { 1069 method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 1070 __ Bind(&method_bss_entry_patches_.back().label); 1071 } 1072 1073 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) { 1074 boot_image_type_patches_.emplace_back( 1075 &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 1076 __ Bind(&boot_image_type_patches_.back().label); 1077 } 1078 1079 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { 1080 type_bss_entry_patches_.emplace_back( 1081 &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 1082 return &type_bss_entry_patches_.back().label; 1083 } 1084 1085 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { 1086 boot_image_string_patches_.emplace_back( 1087 &load_string->GetDexFile(), load_string->GetStringIndex().index_); 1088 __ Bind(&boot_image_string_patches_.back().label); 1089 } 1090 1091 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { 1092 DCHECK(!GetCompilerOptions().IsBootImage()); 1093 string_bss_entry_patches_.emplace_back( 1094 &load_string->GetDexFile(), load_string->GetStringIndex().index_); 1095 return &string_bss_entry_patches_.back().label; 1096 } 1097 1098 // The label points to the end of the "movl" or another instruction but the literal offset 1099 // for method patch needs to point to the embedded constant which occupies the last 4 bytes. 1100 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; 1101 1102 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 1103 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( 1104 const ArenaDeque<PatchInfo<Label>>& infos, 1105 ArenaVector<linker::LinkerPatch>* linker_patches) { 1106 for (const PatchInfo<Label>& info : infos) { 1107 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 1108 linker_patches->push_back( 1109 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index)); 1110 } 1111 } 1112 1113 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 1114 DCHECK(linker_patches->empty()); 1115 size_t size = 1116 boot_image_method_patches_.size() + 1117 method_bss_entry_patches_.size() + 1118 boot_image_type_patches_.size() + 1119 type_bss_entry_patches_.size() + 1120 boot_image_string_patches_.size() + 1121 string_bss_entry_patches_.size(); 1122 linker_patches->reserve(size); 1123 if (GetCompilerOptions().IsBootImage()) { 1124 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 1125 boot_image_method_patches_, linker_patches); 1126 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 1127 boot_image_type_patches_, linker_patches); 1128 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 1129 boot_image_string_patches_, linker_patches); 1130 } else { 1131 DCHECK(boot_image_method_patches_.empty()); 1132 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( 1133 boot_image_type_patches_, linker_patches); 1134 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( 1135 boot_image_string_patches_, linker_patches); 1136 } 1137 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 1138 method_bss_entry_patches_, linker_patches); 1139 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 1140 type_bss_entry_patches_, linker_patches); 1141 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 1142 string_bss_entry_patches_, linker_patches); 1143 DCHECK_EQ(size, linker_patches->size()); 1144 } 1145 1146 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { 1147 stream << Register(reg); 1148 } 1149 1150 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1151 stream << FloatRegister(reg); 1152 } 1153 1154 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1155 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); 1156 return kX86_64WordSize; 1157 } 1158 1159 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1160 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1161 return kX86_64WordSize; 1162 } 1163 1164 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1165 if (GetGraph()->HasSIMD()) { 1166 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1167 } else { 1168 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1169 } 1170 return GetFloatingPointSpillSlotSize(); 1171 } 1172 1173 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1174 if (GetGraph()->HasSIMD()) { 1175 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1176 } else { 1177 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1178 } 1179 return GetFloatingPointSpillSlotSize(); 1180 } 1181 1182 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, 1183 HInstruction* instruction, 1184 uint32_t dex_pc, 1185 SlowPathCode* slow_path) { 1186 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 1187 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value()); 1188 if (EntrypointRequiresStackMap(entrypoint)) { 1189 RecordPcInfo(instruction, dex_pc, slow_path); 1190 } 1191 } 1192 1193 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 1194 HInstruction* instruction, 1195 SlowPathCode* slow_path) { 1196 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 1197 GenerateInvokeRuntime(entry_point_offset); 1198 } 1199 1200 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { 1201 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); 1202 } 1203 1204 static constexpr int kNumberOfCpuRegisterPairs = 0; 1205 // Use a fake return address register to mimic Quick. 1206 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); 1207 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, 1208 const X86_64InstructionSetFeatures& isa_features, 1209 const CompilerOptions& compiler_options, 1210 OptimizingCompilerStats* stats) 1211 : CodeGenerator(graph, 1212 kNumberOfCpuRegisters, 1213 kNumberOfFloatRegisters, 1214 kNumberOfCpuRegisterPairs, 1215 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), 1216 arraysize(kCoreCalleeSaves)) 1217 | (1 << kFakeReturnRegister), 1218 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), 1219 arraysize(kFpuCalleeSaves)), 1220 compiler_options, 1221 stats), 1222 block_labels_(nullptr), 1223 location_builder_(graph, this), 1224 instruction_visitor_(graph, this), 1225 move_resolver_(graph->GetAllocator(), this), 1226 assembler_(graph->GetAllocator()), 1227 isa_features_(isa_features), 1228 constant_area_start_(0), 1229 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1230 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1231 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1232 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1233 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1234 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1235 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1236 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1237 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 1238 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); 1239 } 1240 1241 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, 1242 CodeGeneratorX86_64* codegen) 1243 : InstructionCodeGenerator(graph, codegen), 1244 assembler_(codegen->GetAssembler()), 1245 codegen_(codegen) {} 1246 1247 void CodeGeneratorX86_64::SetupBlockedRegisters() const { 1248 // Stack register is always reserved. 1249 blocked_core_registers_[RSP] = true; 1250 1251 // Block the register used as TMP. 1252 blocked_core_registers_[TMP] = true; 1253 } 1254 1255 static dwarf::Reg DWARFReg(Register reg) { 1256 return dwarf::Reg::X86_64Core(static_cast<int>(reg)); 1257 } 1258 1259 static dwarf::Reg DWARFReg(FloatRegister reg) { 1260 return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); 1261 } 1262 1263 void CodeGeneratorX86_64::GenerateFrameEntry() { 1264 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address 1265 __ Bind(&frame_entry_label_); 1266 bool skip_overflow_check = IsLeafMethod() 1267 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); 1268 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1269 1270 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 1271 __ addw(Address(CpuRegister(kMethodRegisterArgument), 1272 ArtMethod::HotnessCountOffset().Int32Value()), 1273 Immediate(1)); 1274 } 1275 1276 if (!skip_overflow_check) { 1277 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); 1278 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); 1279 RecordPcInfo(nullptr, 0); 1280 } 1281 1282 if (HasEmptyFrame()) { 1283 return; 1284 } 1285 1286 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { 1287 Register reg = kCoreCalleeSaves[i]; 1288 if (allocated_registers_.ContainsCoreRegister(reg)) { 1289 __ pushq(CpuRegister(reg)); 1290 __ cfi().AdjustCFAOffset(kX86_64WordSize); 1291 __ cfi().RelOffset(DWARFReg(reg), 0); 1292 } 1293 } 1294 1295 int adjust = GetFrameSize() - GetCoreSpillSize(); 1296 __ subq(CpuRegister(RSP), Immediate(adjust)); 1297 __ cfi().AdjustCFAOffset(adjust); 1298 uint32_t xmm_spill_location = GetFpuSpillStart(); 1299 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); 1300 1301 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { 1302 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1303 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1304 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); 1305 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); 1306 } 1307 } 1308 1309 // Save the current method if we need it. Note that we do not 1310 // do this in HCurrentMethod, as the instruction might have been removed 1311 // in the SSA graph. 1312 if (RequiresCurrentMethod()) { 1313 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), 1314 CpuRegister(kMethodRegisterArgument)); 1315 } 1316 1317 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1318 // Initialize should_deoptimize flag to 0. 1319 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); 1320 } 1321 } 1322 1323 void CodeGeneratorX86_64::GenerateFrameExit() { 1324 __ cfi().RememberState(); 1325 if (!HasEmptyFrame()) { 1326 uint32_t xmm_spill_location = GetFpuSpillStart(); 1327 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); 1328 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { 1329 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1330 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1331 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset)); 1332 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i])); 1333 } 1334 } 1335 1336 int adjust = GetFrameSize() - GetCoreSpillSize(); 1337 __ addq(CpuRegister(RSP), Immediate(adjust)); 1338 __ cfi().AdjustCFAOffset(-adjust); 1339 1340 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { 1341 Register reg = kCoreCalleeSaves[i]; 1342 if (allocated_registers_.ContainsCoreRegister(reg)) { 1343 __ popq(CpuRegister(reg)); 1344 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); 1345 __ cfi().Restore(DWARFReg(reg)); 1346 } 1347 } 1348 } 1349 __ ret(); 1350 __ cfi().RestoreState(); 1351 __ cfi().DefCFAOffset(GetFrameSize()); 1352 } 1353 1354 void CodeGeneratorX86_64::Bind(HBasicBlock* block) { 1355 __ Bind(GetLabelOf(block)); 1356 } 1357 1358 void CodeGeneratorX86_64::Move(Location destination, Location source) { 1359 if (source.Equals(destination)) { 1360 return; 1361 } 1362 if (destination.IsRegister()) { 1363 CpuRegister dest = destination.AsRegister<CpuRegister>(); 1364 if (source.IsRegister()) { 1365 __ movq(dest, source.AsRegister<CpuRegister>()); 1366 } else if (source.IsFpuRegister()) { 1367 __ movd(dest, source.AsFpuRegister<XmmRegister>()); 1368 } else if (source.IsStackSlot()) { 1369 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1370 } else if (source.IsConstant()) { 1371 HConstant* constant = source.GetConstant(); 1372 if (constant->IsLongConstant()) { 1373 Load64BitValue(dest, constant->AsLongConstant()->GetValue()); 1374 } else { 1375 Load32BitValue(dest, GetInt32ValueOf(constant)); 1376 } 1377 } else { 1378 DCHECK(source.IsDoubleStackSlot()); 1379 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1380 } 1381 } else if (destination.IsFpuRegister()) { 1382 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 1383 if (source.IsRegister()) { 1384 __ movd(dest, source.AsRegister<CpuRegister>()); 1385 } else if (source.IsFpuRegister()) { 1386 __ movaps(dest, source.AsFpuRegister<XmmRegister>()); 1387 } else if (source.IsConstant()) { 1388 HConstant* constant = source.GetConstant(); 1389 int64_t value = CodeGenerator::GetInt64ValueOf(constant); 1390 if (constant->IsFloatConstant()) { 1391 Load32BitValue(dest, static_cast<int32_t>(value)); 1392 } else { 1393 Load64BitValue(dest, value); 1394 } 1395 } else if (source.IsStackSlot()) { 1396 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1397 } else { 1398 DCHECK(source.IsDoubleStackSlot()); 1399 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1400 } 1401 } else if (destination.IsStackSlot()) { 1402 if (source.IsRegister()) { 1403 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 1404 source.AsRegister<CpuRegister>()); 1405 } else if (source.IsFpuRegister()) { 1406 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 1407 source.AsFpuRegister<XmmRegister>()); 1408 } else if (source.IsConstant()) { 1409 HConstant* constant = source.GetConstant(); 1410 int32_t value = GetInt32ValueOf(constant); 1411 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 1412 } else { 1413 DCHECK(source.IsStackSlot()) << source; 1414 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1415 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1416 } 1417 } else { 1418 DCHECK(destination.IsDoubleStackSlot()); 1419 if (source.IsRegister()) { 1420 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 1421 source.AsRegister<CpuRegister>()); 1422 } else if (source.IsFpuRegister()) { 1423 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 1424 source.AsFpuRegister<XmmRegister>()); 1425 } else if (source.IsConstant()) { 1426 HConstant* constant = source.GetConstant(); 1427 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant()); 1428 int64_t value = GetInt64ValueOf(constant); 1429 Store64BitValueToStack(destination, value); 1430 } else { 1431 DCHECK(source.IsDoubleStackSlot()); 1432 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1433 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1434 } 1435 } 1436 } 1437 1438 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { 1439 DCHECK(location.IsRegister()); 1440 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value)); 1441 } 1442 1443 void CodeGeneratorX86_64::MoveLocation( 1444 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) { 1445 Move(dst, src); 1446 } 1447 1448 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1449 if (location.IsRegister()) { 1450 locations->AddTemp(location); 1451 } else { 1452 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1453 } 1454 } 1455 1456 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 1457 if (successor->IsExitBlock()) { 1458 DCHECK(got->GetPrevious()->AlwaysThrows()); 1459 return; // no code needed 1460 } 1461 1462 HBasicBlock* block = got->GetBlock(); 1463 HInstruction* previous = got->GetPrevious(); 1464 1465 HLoopInformation* info = block->GetLoopInformation(); 1466 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 1467 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { 1468 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); 1469 __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), 1470 Immediate(1)); 1471 } 1472 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 1473 return; 1474 } 1475 1476 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 1477 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 1478 } 1479 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { 1480 __ jmp(codegen_->GetLabelOf(successor)); 1481 } 1482 } 1483 1484 void LocationsBuilderX86_64::VisitGoto(HGoto* got) { 1485 got->SetLocations(nullptr); 1486 } 1487 1488 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { 1489 HandleGoto(got, got->GetSuccessor()); 1490 } 1491 1492 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1493 try_boundary->SetLocations(nullptr); 1494 } 1495 1496 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1497 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 1498 if (!successor->IsExitBlock()) { 1499 HandleGoto(try_boundary, successor); 1500 } 1501 } 1502 1503 void LocationsBuilderX86_64::VisitExit(HExit* exit) { 1504 exit->SetLocations(nullptr); 1505 } 1506 1507 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 1508 } 1509 1510 template<class LabelType> 1511 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, 1512 LabelType* true_label, 1513 LabelType* false_label) { 1514 if (cond->IsFPConditionTrueIfNaN()) { 1515 __ j(kUnordered, true_label); 1516 } else if (cond->IsFPConditionFalseIfNaN()) { 1517 __ j(kUnordered, false_label); 1518 } 1519 __ j(X86_64FPCondition(cond->GetCondition()), true_label); 1520 } 1521 1522 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) { 1523 LocationSummary* locations = condition->GetLocations(); 1524 1525 Location left = locations->InAt(0); 1526 Location right = locations->InAt(1); 1527 DataType::Type type = condition->InputAt(0)->GetType(); 1528 switch (type) { 1529 case DataType::Type::kBool: 1530 case DataType::Type::kUint8: 1531 case DataType::Type::kInt8: 1532 case DataType::Type::kUint16: 1533 case DataType::Type::kInt16: 1534 case DataType::Type::kInt32: 1535 case DataType::Type::kReference: { 1536 codegen_->GenerateIntCompare(left, right); 1537 break; 1538 } 1539 case DataType::Type::kInt64: { 1540 codegen_->GenerateLongCompare(left, right); 1541 break; 1542 } 1543 case DataType::Type::kFloat32: { 1544 if (right.IsFpuRegister()) { 1545 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1546 } else if (right.IsConstant()) { 1547 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1548 codegen_->LiteralFloatAddress( 1549 right.GetConstant()->AsFloatConstant()->GetValue())); 1550 } else { 1551 DCHECK(right.IsStackSlot()); 1552 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1553 Address(CpuRegister(RSP), right.GetStackIndex())); 1554 } 1555 break; 1556 } 1557 case DataType::Type::kFloat64: { 1558 if (right.IsFpuRegister()) { 1559 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1560 } else if (right.IsConstant()) { 1561 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1562 codegen_->LiteralDoubleAddress( 1563 right.GetConstant()->AsDoubleConstant()->GetValue())); 1564 } else { 1565 DCHECK(right.IsDoubleStackSlot()); 1566 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1567 Address(CpuRegister(RSP), right.GetStackIndex())); 1568 } 1569 break; 1570 } 1571 default: 1572 LOG(FATAL) << "Unexpected condition type " << type; 1573 } 1574 } 1575 1576 template<class LabelType> 1577 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, 1578 LabelType* true_target_in, 1579 LabelType* false_target_in) { 1580 // Generated branching requires both targets to be explicit. If either of the 1581 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. 1582 LabelType fallthrough_target; 1583 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; 1584 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; 1585 1586 // Generate the comparison to set the CC. 1587 GenerateCompareTest(condition); 1588 1589 // Now generate the correct jump(s). 1590 DataType::Type type = condition->InputAt(0)->GetType(); 1591 switch (type) { 1592 case DataType::Type::kInt64: { 1593 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1594 break; 1595 } 1596 case DataType::Type::kFloat32: { 1597 GenerateFPJumps(condition, true_target, false_target); 1598 break; 1599 } 1600 case DataType::Type::kFloat64: { 1601 GenerateFPJumps(condition, true_target, false_target); 1602 break; 1603 } 1604 default: 1605 LOG(FATAL) << "Unexpected condition type " << type; 1606 } 1607 1608 if (false_target != &fallthrough_target) { 1609 __ jmp(false_target); 1610 } 1611 1612 if (fallthrough_target.IsLinked()) { 1613 __ Bind(&fallthrough_target); 1614 } 1615 } 1616 1617 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { 1618 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS 1619 // are set only strictly before `branch`. We can't use the eflags on long 1620 // conditions if they are materialized due to the complex branching. 1621 return cond->IsCondition() && 1622 cond->GetNext() == branch && 1623 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); 1624 } 1625 1626 template<class LabelType> 1627 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, 1628 size_t condition_input_index, 1629 LabelType* true_target, 1630 LabelType* false_target) { 1631 HInstruction* cond = instruction->InputAt(condition_input_index); 1632 1633 if (true_target == nullptr && false_target == nullptr) { 1634 // Nothing to do. The code always falls through. 1635 return; 1636 } else if (cond->IsIntConstant()) { 1637 // Constant condition, statically compared against "true" (integer value 1). 1638 if (cond->AsIntConstant()->IsTrue()) { 1639 if (true_target != nullptr) { 1640 __ jmp(true_target); 1641 } 1642 } else { 1643 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 1644 if (false_target != nullptr) { 1645 __ jmp(false_target); 1646 } 1647 } 1648 return; 1649 } 1650 1651 // The following code generates these patterns: 1652 // (1) true_target == nullptr && false_target != nullptr 1653 // - opposite condition true => branch to false_target 1654 // (2) true_target != nullptr && false_target == nullptr 1655 // - condition true => branch to true_target 1656 // (3) true_target != nullptr && false_target != nullptr 1657 // - condition true => branch to true_target 1658 // - branch to false_target 1659 if (IsBooleanValueOrMaterializedCondition(cond)) { 1660 if (AreEflagsSetFrom(cond, instruction)) { 1661 if (true_target == nullptr) { 1662 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target); 1663 } else { 1664 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); 1665 } 1666 } else { 1667 // Materialized condition, compare against 0. 1668 Location lhs = instruction->GetLocations()->InAt(condition_input_index); 1669 if (lhs.IsRegister()) { 1670 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); 1671 } else { 1672 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); 1673 } 1674 if (true_target == nullptr) { 1675 __ j(kEqual, false_target); 1676 } else { 1677 __ j(kNotEqual, true_target); 1678 } 1679 } 1680 } else { 1681 // Condition has not been materialized, use its inputs as the 1682 // comparison and its condition as the branch condition. 1683 HCondition* condition = cond->AsCondition(); 1684 1685 // If this is a long or FP comparison that has been folded into 1686 // the HCondition, generate the comparison directly. 1687 DataType::Type type = condition->InputAt(0)->GetType(); 1688 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { 1689 GenerateCompareTestAndBranch(condition, true_target, false_target); 1690 return; 1691 } 1692 1693 Location lhs = condition->GetLocations()->InAt(0); 1694 Location rhs = condition->GetLocations()->InAt(1); 1695 codegen_->GenerateIntCompare(lhs, rhs); 1696 if (true_target == nullptr) { 1697 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); 1698 } else { 1699 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1700 } 1701 } 1702 1703 // If neither branch falls through (case 3), the conditional branch to `true_target` 1704 // was already emitted (case 2) and we need to emit a jump to `false_target`. 1705 if (true_target != nullptr && false_target != nullptr) { 1706 __ jmp(false_target); 1707 } 1708 } 1709 1710 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { 1711 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 1712 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 1713 locations->SetInAt(0, Location::Any()); 1714 } 1715 } 1716 1717 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { 1718 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 1719 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 1720 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? 1721 nullptr : codegen_->GetLabelOf(true_successor); 1722 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? 1723 nullptr : codegen_->GetLabelOf(false_successor); 1724 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); 1725 } 1726 1727 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1728 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1729 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 1730 InvokeRuntimeCallingConvention calling_convention; 1731 RegisterSet caller_saves = RegisterSet::Empty(); 1732 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1733 locations->SetCustomSlowPathCallerSaves(caller_saves); 1734 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 1735 locations->SetInAt(0, Location::Any()); 1736 } 1737 } 1738 1739 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1740 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); 1741 GenerateTestAndBranch<Label>(deoptimize, 1742 /* condition_input_index */ 0, 1743 slow_path->GetEntryLabel(), 1744 /* false_target */ nullptr); 1745 } 1746 1747 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1748 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1749 LocationSummary(flag, LocationSummary::kNoCall); 1750 locations->SetOut(Location::RequiresRegister()); 1751 } 1752 1753 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1754 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(), 1755 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 1756 } 1757 1758 static bool SelectCanUseCMOV(HSelect* select) { 1759 // There are no conditional move instructions for XMMs. 1760 if (DataType::IsFloatingPointType(select->GetType())) { 1761 return false; 1762 } 1763 1764 // A FP condition doesn't generate the single CC that we need. 1765 HInstruction* condition = select->GetCondition(); 1766 if (condition->IsCondition() && 1767 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) { 1768 return false; 1769 } 1770 1771 // We can generate a CMOV for this Select. 1772 return true; 1773 } 1774 1775 void LocationsBuilderX86_64::VisitSelect(HSelect* select) { 1776 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 1777 if (DataType::IsFloatingPointType(select->GetType())) { 1778 locations->SetInAt(0, Location::RequiresFpuRegister()); 1779 locations->SetInAt(1, Location::Any()); 1780 } else { 1781 locations->SetInAt(0, Location::RequiresRegister()); 1782 if (SelectCanUseCMOV(select)) { 1783 if (select->InputAt(1)->IsConstant()) { 1784 locations->SetInAt(1, Location::RequiresRegister()); 1785 } else { 1786 locations->SetInAt(1, Location::Any()); 1787 } 1788 } else { 1789 locations->SetInAt(1, Location::Any()); 1790 } 1791 } 1792 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 1793 locations->SetInAt(2, Location::RequiresRegister()); 1794 } 1795 locations->SetOut(Location::SameAsFirstInput()); 1796 } 1797 1798 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { 1799 LocationSummary* locations = select->GetLocations(); 1800 if (SelectCanUseCMOV(select)) { 1801 // If both the condition and the source types are integer, we can generate 1802 // a CMOV to implement Select. 1803 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>(); 1804 Location value_true_loc = locations->InAt(1); 1805 DCHECK(locations->InAt(0).Equals(locations->Out())); 1806 1807 HInstruction* select_condition = select->GetCondition(); 1808 Condition cond = kNotEqual; 1809 1810 // Figure out how to test the 'condition'. 1811 if (select_condition->IsCondition()) { 1812 HCondition* condition = select_condition->AsCondition(); 1813 if (!condition->IsEmittedAtUseSite()) { 1814 // This was a previously materialized condition. 1815 // Can we use the existing condition code? 1816 if (AreEflagsSetFrom(condition, select)) { 1817 // Materialization was the previous instruction. Condition codes are right. 1818 cond = X86_64IntegerCondition(condition->GetCondition()); 1819 } else { 1820 // No, we have to recreate the condition code. 1821 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1822 __ testl(cond_reg, cond_reg); 1823 } 1824 } else { 1825 GenerateCompareTest(condition); 1826 cond = X86_64IntegerCondition(condition->GetCondition()); 1827 } 1828 } else { 1829 // Must be a Boolean condition, which needs to be compared to 0. 1830 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1831 __ testl(cond_reg, cond_reg); 1832 } 1833 1834 // If the condition is true, overwrite the output, which already contains false. 1835 // Generate the correct sized CMOV. 1836 bool is_64_bit = DataType::Is64BitType(select->GetType()); 1837 if (value_true_loc.IsRegister()) { 1838 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit); 1839 } else { 1840 __ cmov(cond, 1841 value_false, 1842 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit); 1843 } 1844 } else { 1845 NearLabel false_target; 1846 GenerateTestAndBranch<NearLabel>(select, 1847 /* condition_input_index */ 2, 1848 /* true_target */ nullptr, 1849 &false_target); 1850 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); 1851 __ Bind(&false_target); 1852 } 1853 } 1854 1855 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 1856 new (GetGraph()->GetAllocator()) LocationSummary(info); 1857 } 1858 1859 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) { 1860 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 1861 } 1862 1863 void CodeGeneratorX86_64::GenerateNop() { 1864 __ nop(); 1865 } 1866 1867 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { 1868 LocationSummary* locations = 1869 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); 1870 // Handle the long/FP comparisons made in instruction simplification. 1871 switch (cond->InputAt(0)->GetType()) { 1872 case DataType::Type::kInt64: 1873 locations->SetInAt(0, Location::RequiresRegister()); 1874 locations->SetInAt(1, Location::Any()); 1875 break; 1876 case DataType::Type::kFloat32: 1877 case DataType::Type::kFloat64: 1878 locations->SetInAt(0, Location::RequiresFpuRegister()); 1879 locations->SetInAt(1, Location::Any()); 1880 break; 1881 default: 1882 locations->SetInAt(0, Location::RequiresRegister()); 1883 locations->SetInAt(1, Location::Any()); 1884 break; 1885 } 1886 if (!cond->IsEmittedAtUseSite()) { 1887 locations->SetOut(Location::RequiresRegister()); 1888 } 1889 } 1890 1891 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { 1892 if (cond->IsEmittedAtUseSite()) { 1893 return; 1894 } 1895 1896 LocationSummary* locations = cond->GetLocations(); 1897 Location lhs = locations->InAt(0); 1898 Location rhs = locations->InAt(1); 1899 CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); 1900 NearLabel true_label, false_label; 1901 1902 switch (cond->InputAt(0)->GetType()) { 1903 default: 1904 // Integer case. 1905 1906 // Clear output register: setcc only sets the low byte. 1907 __ xorl(reg, reg); 1908 1909 codegen_->GenerateIntCompare(lhs, rhs); 1910 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 1911 return; 1912 case DataType::Type::kInt64: 1913 // Clear output register: setcc only sets the low byte. 1914 __ xorl(reg, reg); 1915 1916 codegen_->GenerateLongCompare(lhs, rhs); 1917 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 1918 return; 1919 case DataType::Type::kFloat32: { 1920 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 1921 if (rhs.IsConstant()) { 1922 float value = rhs.GetConstant()->AsFloatConstant()->GetValue(); 1923 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value)); 1924 } else if (rhs.IsStackSlot()) { 1925 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 1926 } else { 1927 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 1928 } 1929 GenerateFPJumps(cond, &true_label, &false_label); 1930 break; 1931 } 1932 case DataType::Type::kFloat64: { 1933 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 1934 if (rhs.IsConstant()) { 1935 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue(); 1936 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value)); 1937 } else if (rhs.IsDoubleStackSlot()) { 1938 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 1939 } else { 1940 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 1941 } 1942 GenerateFPJumps(cond, &true_label, &false_label); 1943 break; 1944 } 1945 } 1946 1947 // Convert the jumps into the result. 1948 NearLabel done_label; 1949 1950 // False case: result = 0. 1951 __ Bind(&false_label); 1952 __ xorl(reg, reg); 1953 __ jmp(&done_label); 1954 1955 // True case: result = 1. 1956 __ Bind(&true_label); 1957 __ movl(reg, Immediate(1)); 1958 __ Bind(&done_label); 1959 } 1960 1961 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { 1962 HandleCondition(comp); 1963 } 1964 1965 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { 1966 HandleCondition(comp); 1967 } 1968 1969 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { 1970 HandleCondition(comp); 1971 } 1972 1973 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { 1974 HandleCondition(comp); 1975 } 1976 1977 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { 1978 HandleCondition(comp); 1979 } 1980 1981 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { 1982 HandleCondition(comp); 1983 } 1984 1985 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 1986 HandleCondition(comp); 1987 } 1988 1989 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 1990 HandleCondition(comp); 1991 } 1992 1993 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { 1994 HandleCondition(comp); 1995 } 1996 1997 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { 1998 HandleCondition(comp); 1999 } 2000 2001 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2002 HandleCondition(comp); 2003 } 2004 2005 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2006 HandleCondition(comp); 2007 } 2008 2009 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) { 2010 HandleCondition(comp); 2011 } 2012 2013 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) { 2014 HandleCondition(comp); 2015 } 2016 2017 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2018 HandleCondition(comp); 2019 } 2020 2021 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2022 HandleCondition(comp); 2023 } 2024 2025 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) { 2026 HandleCondition(comp); 2027 } 2028 2029 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) { 2030 HandleCondition(comp); 2031 } 2032 2033 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2034 HandleCondition(comp); 2035 } 2036 2037 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2038 HandleCondition(comp); 2039 } 2040 2041 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { 2042 LocationSummary* locations = 2043 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 2044 switch (compare->InputAt(0)->GetType()) { 2045 case DataType::Type::kBool: 2046 case DataType::Type::kUint8: 2047 case DataType::Type::kInt8: 2048 case DataType::Type::kUint16: 2049 case DataType::Type::kInt16: 2050 case DataType::Type::kInt32: 2051 case DataType::Type::kInt64: { 2052 locations->SetInAt(0, Location::RequiresRegister()); 2053 locations->SetInAt(1, Location::Any()); 2054 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2055 break; 2056 } 2057 case DataType::Type::kFloat32: 2058 case DataType::Type::kFloat64: { 2059 locations->SetInAt(0, Location::RequiresFpuRegister()); 2060 locations->SetInAt(1, Location::Any()); 2061 locations->SetOut(Location::RequiresRegister()); 2062 break; 2063 } 2064 default: 2065 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); 2066 } 2067 } 2068 2069 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { 2070 LocationSummary* locations = compare->GetLocations(); 2071 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2072 Location left = locations->InAt(0); 2073 Location right = locations->InAt(1); 2074 2075 NearLabel less, greater, done; 2076 DataType::Type type = compare->InputAt(0)->GetType(); 2077 Condition less_cond = kLess; 2078 2079 switch (type) { 2080 case DataType::Type::kBool: 2081 case DataType::Type::kUint8: 2082 case DataType::Type::kInt8: 2083 case DataType::Type::kUint16: 2084 case DataType::Type::kInt16: 2085 case DataType::Type::kInt32: { 2086 codegen_->GenerateIntCompare(left, right); 2087 break; 2088 } 2089 case DataType::Type::kInt64: { 2090 codegen_->GenerateLongCompare(left, right); 2091 break; 2092 } 2093 case DataType::Type::kFloat32: { 2094 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2095 if (right.IsConstant()) { 2096 float value = right.GetConstant()->AsFloatConstant()->GetValue(); 2097 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value)); 2098 } else if (right.IsStackSlot()) { 2099 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2100 } else { 2101 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>()); 2102 } 2103 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2104 less_cond = kBelow; // ucomis{s,d} sets CF 2105 break; 2106 } 2107 case DataType::Type::kFloat64: { 2108 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2109 if (right.IsConstant()) { 2110 double value = right.GetConstant()->AsDoubleConstant()->GetValue(); 2111 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value)); 2112 } else if (right.IsDoubleStackSlot()) { 2113 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2114 } else { 2115 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>()); 2116 } 2117 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2118 less_cond = kBelow; // ucomis{s,d} sets CF 2119 break; 2120 } 2121 default: 2122 LOG(FATAL) << "Unexpected compare type " << type; 2123 } 2124 2125 __ movl(out, Immediate(0)); 2126 __ j(kEqual, &done); 2127 __ j(less_cond, &less); 2128 2129 __ Bind(&greater); 2130 __ movl(out, Immediate(1)); 2131 __ jmp(&done); 2132 2133 __ Bind(&less); 2134 __ movl(out, Immediate(-1)); 2135 2136 __ Bind(&done); 2137 } 2138 2139 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { 2140 LocationSummary* locations = 2141 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2142 locations->SetOut(Location::ConstantLocation(constant)); 2143 } 2144 2145 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 2146 // Will be generated at use site. 2147 } 2148 2149 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { 2150 LocationSummary* locations = 2151 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2152 locations->SetOut(Location::ConstantLocation(constant)); 2153 } 2154 2155 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 2156 // Will be generated at use site. 2157 } 2158 2159 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { 2160 LocationSummary* locations = 2161 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2162 locations->SetOut(Location::ConstantLocation(constant)); 2163 } 2164 2165 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 2166 // Will be generated at use site. 2167 } 2168 2169 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { 2170 LocationSummary* locations = 2171 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2172 locations->SetOut(Location::ConstantLocation(constant)); 2173 } 2174 2175 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 2176 // Will be generated at use site. 2177 } 2178 2179 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { 2180 LocationSummary* locations = 2181 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2182 locations->SetOut(Location::ConstantLocation(constant)); 2183 } 2184 2185 void InstructionCodeGeneratorX86_64::VisitDoubleConstant( 2186 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 2187 // Will be generated at use site. 2188 } 2189 2190 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) { 2191 constructor_fence->SetLocations(nullptr); 2192 } 2193 2194 void InstructionCodeGeneratorX86_64::VisitConstructorFence( 2195 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 2196 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2197 } 2198 2199 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2200 memory_barrier->SetLocations(nullptr); 2201 } 2202 2203 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2204 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 2205 } 2206 2207 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { 2208 ret->SetLocations(nullptr); 2209 } 2210 2211 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { 2212 codegen_->GenerateFrameExit(); 2213 } 2214 2215 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { 2216 LocationSummary* locations = 2217 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); 2218 switch (ret->InputAt(0)->GetType()) { 2219 case DataType::Type::kReference: 2220 case DataType::Type::kBool: 2221 case DataType::Type::kUint8: 2222 case DataType::Type::kInt8: 2223 case DataType::Type::kUint16: 2224 case DataType::Type::kInt16: 2225 case DataType::Type::kInt32: 2226 case DataType::Type::kInt64: 2227 locations->SetInAt(0, Location::RegisterLocation(RAX)); 2228 break; 2229 2230 case DataType::Type::kFloat32: 2231 case DataType::Type::kFloat64: 2232 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); 2233 break; 2234 2235 default: 2236 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2237 } 2238 } 2239 2240 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { 2241 if (kIsDebugBuild) { 2242 switch (ret->InputAt(0)->GetType()) { 2243 case DataType::Type::kReference: 2244 case DataType::Type::kBool: 2245 case DataType::Type::kUint8: 2246 case DataType::Type::kInt8: 2247 case DataType::Type::kUint16: 2248 case DataType::Type::kInt16: 2249 case DataType::Type::kInt32: 2250 case DataType::Type::kInt64: 2251 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); 2252 break; 2253 2254 case DataType::Type::kFloat32: 2255 case DataType::Type::kFloat64: 2256 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), 2257 XMM0); 2258 break; 2259 2260 default: 2261 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2262 } 2263 } 2264 codegen_->GenerateFrameExit(); 2265 } 2266 2267 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const { 2268 switch (type) { 2269 case DataType::Type::kReference: 2270 case DataType::Type::kBool: 2271 case DataType::Type::kUint8: 2272 case DataType::Type::kInt8: 2273 case DataType::Type::kUint16: 2274 case DataType::Type::kInt16: 2275 case DataType::Type::kUint32: 2276 case DataType::Type::kInt32: 2277 case DataType::Type::kUint64: 2278 case DataType::Type::kInt64: 2279 return Location::RegisterLocation(RAX); 2280 2281 case DataType::Type::kVoid: 2282 return Location::NoLocation(); 2283 2284 case DataType::Type::kFloat64: 2285 case DataType::Type::kFloat32: 2286 return Location::FpuRegisterLocation(XMM0); 2287 } 2288 2289 UNREACHABLE(); 2290 } 2291 2292 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const { 2293 return Location::RegisterLocation(kMethodRegisterArgument); 2294 } 2295 2296 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) { 2297 switch (type) { 2298 case DataType::Type::kReference: 2299 case DataType::Type::kBool: 2300 case DataType::Type::kUint8: 2301 case DataType::Type::kInt8: 2302 case DataType::Type::kUint16: 2303 case DataType::Type::kInt16: 2304 case DataType::Type::kInt32: { 2305 uint32_t index = gp_index_++; 2306 stack_index_++; 2307 if (index < calling_convention.GetNumberOfRegisters()) { 2308 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2309 } else { 2310 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2311 } 2312 } 2313 2314 case DataType::Type::kInt64: { 2315 uint32_t index = gp_index_; 2316 stack_index_ += 2; 2317 if (index < calling_convention.GetNumberOfRegisters()) { 2318 gp_index_ += 1; 2319 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2320 } else { 2321 gp_index_ += 2; 2322 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2323 } 2324 } 2325 2326 case DataType::Type::kFloat32: { 2327 uint32_t index = float_index_++; 2328 stack_index_++; 2329 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2330 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2331 } else { 2332 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2333 } 2334 } 2335 2336 case DataType::Type::kFloat64: { 2337 uint32_t index = float_index_++; 2338 stack_index_ += 2; 2339 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2340 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2341 } else { 2342 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2343 } 2344 } 2345 2346 case DataType::Type::kUint32: 2347 case DataType::Type::kUint64: 2348 case DataType::Type::kVoid: 2349 LOG(FATAL) << "Unexpected parameter type " << type; 2350 break; 2351 } 2352 return Location::NoLocation(); 2353 } 2354 2355 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2356 // The trampoline uses the same calling convention as dex calling conventions, 2357 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 2358 // the method_idx. 2359 HandleInvoke(invoke); 2360 } 2361 2362 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2363 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 2364 } 2365 2366 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2367 // Explicit clinit checks triggered by static invokes must have been pruned by 2368 // art::PrepareForRegisterAllocation. 2369 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2370 2371 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2372 if (intrinsic.TryDispatch(invoke)) { 2373 return; 2374 } 2375 2376 HandleInvoke(invoke); 2377 } 2378 2379 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 2380 if (invoke->GetLocations()->Intrinsified()) { 2381 IntrinsicCodeGeneratorX86_64 intrinsic(codegen); 2382 intrinsic.Dispatch(invoke); 2383 return true; 2384 } 2385 return false; 2386 } 2387 2388 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2389 // Explicit clinit checks triggered by static invokes must have been pruned by 2390 // art::PrepareForRegisterAllocation. 2391 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2392 2393 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2394 return; 2395 } 2396 2397 LocationSummary* locations = invoke->GetLocations(); 2398 codegen_->GenerateStaticOrDirectCall( 2399 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 2400 } 2401 2402 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { 2403 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 2404 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 2405 } 2406 2407 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2408 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2409 if (intrinsic.TryDispatch(invoke)) { 2410 return; 2411 } 2412 2413 HandleInvoke(invoke); 2414 } 2415 2416 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2417 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2418 return; 2419 } 2420 2421 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 2422 DCHECK(!codegen_->IsLeafMethod()); 2423 } 2424 2425 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2426 HandleInvoke(invoke); 2427 // Add the hidden argument. 2428 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX)); 2429 } 2430 2431 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2432 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 2433 LocationSummary* locations = invoke->GetLocations(); 2434 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2435 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 2436 Location receiver = locations->InAt(0); 2437 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 2438 2439 // Set the hidden argument. This is safe to do this here, as RAX 2440 // won't be modified thereafter, before the `call` instruction. 2441 DCHECK_EQ(RAX, hidden_reg.AsRegister()); 2442 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); 2443 2444 if (receiver.IsStackSlot()) { 2445 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); 2446 // /* HeapReference<Class> */ temp = temp->klass_ 2447 __ movl(temp, Address(temp, class_offset)); 2448 } else { 2449 // /* HeapReference<Class> */ temp = receiver->klass_ 2450 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); 2451 } 2452 codegen_->MaybeRecordImplicitNullCheck(invoke); 2453 // Instead of simply (possibly) unpoisoning `temp` here, we should 2454 // emit a read barrier for the previous class reference load. 2455 // However this is not required in practice, as this is an 2456 // intermediate/temporary reference and because the current 2457 // concurrent copying collector keeps the from-space memory 2458 // intact/accessible until the end of the marking phase (the 2459 // concurrent copying collector may not in the future). 2460 __ MaybeUnpoisonHeapReference(temp); 2461 // temp = temp->GetAddressOfIMT() 2462 __ movq(temp, 2463 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 2464 // temp = temp->GetImtEntryAt(method_offset); 2465 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 2466 invoke->GetImtIndex(), kX86_64PointerSize)); 2467 // temp = temp->GetImtEntryAt(method_offset); 2468 __ movq(temp, Address(temp, method_offset)); 2469 // call temp->GetEntryPoint(); 2470 __ call(Address( 2471 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue())); 2472 2473 DCHECK(!codegen_->IsLeafMethod()); 2474 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 2475 } 2476 2477 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2478 HandleInvoke(invoke); 2479 } 2480 2481 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2482 codegen_->GenerateInvokePolymorphicCall(invoke); 2483 } 2484 2485 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { 2486 LocationSummary* locations = 2487 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 2488 switch (neg->GetResultType()) { 2489 case DataType::Type::kInt32: 2490 case DataType::Type::kInt64: 2491 locations->SetInAt(0, Location::RequiresRegister()); 2492 locations->SetOut(Location::SameAsFirstInput()); 2493 break; 2494 2495 case DataType::Type::kFloat32: 2496 case DataType::Type::kFloat64: 2497 locations->SetInAt(0, Location::RequiresFpuRegister()); 2498 locations->SetOut(Location::SameAsFirstInput()); 2499 locations->AddTemp(Location::RequiresFpuRegister()); 2500 break; 2501 2502 default: 2503 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2504 } 2505 } 2506 2507 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { 2508 LocationSummary* locations = neg->GetLocations(); 2509 Location out = locations->Out(); 2510 Location in = locations->InAt(0); 2511 switch (neg->GetResultType()) { 2512 case DataType::Type::kInt32: 2513 DCHECK(in.IsRegister()); 2514 DCHECK(in.Equals(out)); 2515 __ negl(out.AsRegister<CpuRegister>()); 2516 break; 2517 2518 case DataType::Type::kInt64: 2519 DCHECK(in.IsRegister()); 2520 DCHECK(in.Equals(out)); 2521 __ negq(out.AsRegister<CpuRegister>()); 2522 break; 2523 2524 case DataType::Type::kFloat32: { 2525 DCHECK(in.Equals(out)); 2526 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2527 // Implement float negation with an exclusive or with value 2528 // 0x80000000 (mask for bit 31, representing the sign of a 2529 // single-precision floating-point number). 2530 __ movss(mask, codegen_->LiteralInt32Address(0x80000000)); 2531 __ xorps(out.AsFpuRegister<XmmRegister>(), mask); 2532 break; 2533 } 2534 2535 case DataType::Type::kFloat64: { 2536 DCHECK(in.Equals(out)); 2537 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2538 // Implement double negation with an exclusive or with value 2539 // 0x8000000000000000 (mask for bit 63, representing the sign of 2540 // a double-precision floating-point number). 2541 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000))); 2542 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); 2543 break; 2544 } 2545 2546 default: 2547 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2548 } 2549 } 2550 2551 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2552 LocationSummary* locations = 2553 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); 2554 DataType::Type result_type = conversion->GetResultType(); 2555 DataType::Type input_type = conversion->GetInputType(); 2556 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2557 << input_type << " -> " << result_type; 2558 2559 switch (result_type) { 2560 case DataType::Type::kUint8: 2561 case DataType::Type::kInt8: 2562 case DataType::Type::kUint16: 2563 case DataType::Type::kInt16: 2564 DCHECK(DataType::IsIntegralType(input_type)) << input_type; 2565 locations->SetInAt(0, Location::Any()); 2566 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2567 break; 2568 2569 case DataType::Type::kInt32: 2570 switch (input_type) { 2571 case DataType::Type::kInt64: 2572 locations->SetInAt(0, Location::Any()); 2573 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2574 break; 2575 2576 case DataType::Type::kFloat32: 2577 locations->SetInAt(0, Location::RequiresFpuRegister()); 2578 locations->SetOut(Location::RequiresRegister()); 2579 break; 2580 2581 case DataType::Type::kFloat64: 2582 locations->SetInAt(0, Location::RequiresFpuRegister()); 2583 locations->SetOut(Location::RequiresRegister()); 2584 break; 2585 2586 default: 2587 LOG(FATAL) << "Unexpected type conversion from " << input_type 2588 << " to " << result_type; 2589 } 2590 break; 2591 2592 case DataType::Type::kInt64: 2593 switch (input_type) { 2594 case DataType::Type::kBool: 2595 case DataType::Type::kUint8: 2596 case DataType::Type::kInt8: 2597 case DataType::Type::kUint16: 2598 case DataType::Type::kInt16: 2599 case DataType::Type::kInt32: 2600 // TODO: We would benefit from a (to-be-implemented) 2601 // Location::RegisterOrStackSlot requirement for this input. 2602 locations->SetInAt(0, Location::RequiresRegister()); 2603 locations->SetOut(Location::RequiresRegister()); 2604 break; 2605 2606 case DataType::Type::kFloat32: 2607 locations->SetInAt(0, Location::RequiresFpuRegister()); 2608 locations->SetOut(Location::RequiresRegister()); 2609 break; 2610 2611 case DataType::Type::kFloat64: 2612 locations->SetInAt(0, Location::RequiresFpuRegister()); 2613 locations->SetOut(Location::RequiresRegister()); 2614 break; 2615 2616 default: 2617 LOG(FATAL) << "Unexpected type conversion from " << input_type 2618 << " to " << result_type; 2619 } 2620 break; 2621 2622 case DataType::Type::kFloat32: 2623 switch (input_type) { 2624 case DataType::Type::kBool: 2625 case DataType::Type::kUint8: 2626 case DataType::Type::kInt8: 2627 case DataType::Type::kUint16: 2628 case DataType::Type::kInt16: 2629 case DataType::Type::kInt32: 2630 locations->SetInAt(0, Location::Any()); 2631 locations->SetOut(Location::RequiresFpuRegister()); 2632 break; 2633 2634 case DataType::Type::kInt64: 2635 locations->SetInAt(0, Location::Any()); 2636 locations->SetOut(Location::RequiresFpuRegister()); 2637 break; 2638 2639 case DataType::Type::kFloat64: 2640 locations->SetInAt(0, Location::Any()); 2641 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2642 break; 2643 2644 default: 2645 LOG(FATAL) << "Unexpected type conversion from " << input_type 2646 << " to " << result_type; 2647 } 2648 break; 2649 2650 case DataType::Type::kFloat64: 2651 switch (input_type) { 2652 case DataType::Type::kBool: 2653 case DataType::Type::kUint8: 2654 case DataType::Type::kInt8: 2655 case DataType::Type::kUint16: 2656 case DataType::Type::kInt16: 2657 case DataType::Type::kInt32: 2658 locations->SetInAt(0, Location::Any()); 2659 locations->SetOut(Location::RequiresFpuRegister()); 2660 break; 2661 2662 case DataType::Type::kInt64: 2663 locations->SetInAt(0, Location::Any()); 2664 locations->SetOut(Location::RequiresFpuRegister()); 2665 break; 2666 2667 case DataType::Type::kFloat32: 2668 locations->SetInAt(0, Location::Any()); 2669 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2670 break; 2671 2672 default: 2673 LOG(FATAL) << "Unexpected type conversion from " << input_type 2674 << " to " << result_type; 2675 } 2676 break; 2677 2678 default: 2679 LOG(FATAL) << "Unexpected type conversion from " << input_type 2680 << " to " << result_type; 2681 } 2682 } 2683 2684 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2685 LocationSummary* locations = conversion->GetLocations(); 2686 Location out = locations->Out(); 2687 Location in = locations->InAt(0); 2688 DataType::Type result_type = conversion->GetResultType(); 2689 DataType::Type input_type = conversion->GetInputType(); 2690 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2691 << input_type << " -> " << result_type; 2692 switch (result_type) { 2693 case DataType::Type::kUint8: 2694 switch (input_type) { 2695 case DataType::Type::kInt8: 2696 case DataType::Type::kUint16: 2697 case DataType::Type::kInt16: 2698 case DataType::Type::kInt32: 2699 case DataType::Type::kInt64: 2700 if (in.IsRegister()) { 2701 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2702 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2703 __ movzxb(out.AsRegister<CpuRegister>(), 2704 Address(CpuRegister(RSP), in.GetStackIndex())); 2705 } else { 2706 __ movl(out.AsRegister<CpuRegister>(), 2707 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant())))); 2708 } 2709 break; 2710 2711 default: 2712 LOG(FATAL) << "Unexpected type conversion from " << input_type 2713 << " to " << result_type; 2714 } 2715 break; 2716 2717 case DataType::Type::kInt8: 2718 switch (input_type) { 2719 case DataType::Type::kUint8: 2720 case DataType::Type::kUint16: 2721 case DataType::Type::kInt16: 2722 case DataType::Type::kInt32: 2723 case DataType::Type::kInt64: 2724 if (in.IsRegister()) { 2725 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2726 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2727 __ movsxb(out.AsRegister<CpuRegister>(), 2728 Address(CpuRegister(RSP), in.GetStackIndex())); 2729 } else { 2730 __ movl(out.AsRegister<CpuRegister>(), 2731 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant())))); 2732 } 2733 break; 2734 2735 default: 2736 LOG(FATAL) << "Unexpected type conversion from " << input_type 2737 << " to " << result_type; 2738 } 2739 break; 2740 2741 case DataType::Type::kUint16: 2742 switch (input_type) { 2743 case DataType::Type::kInt8: 2744 case DataType::Type::kInt16: 2745 case DataType::Type::kInt32: 2746 case DataType::Type::kInt64: 2747 if (in.IsRegister()) { 2748 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2749 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2750 __ movzxw(out.AsRegister<CpuRegister>(), 2751 Address(CpuRegister(RSP), in.GetStackIndex())); 2752 } else { 2753 __ movl(out.AsRegister<CpuRegister>(), 2754 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant())))); 2755 } 2756 break; 2757 2758 default: 2759 LOG(FATAL) << "Unexpected type conversion from " << input_type 2760 << " to " << result_type; 2761 } 2762 break; 2763 2764 case DataType::Type::kInt16: 2765 switch (input_type) { 2766 case DataType::Type::kUint16: 2767 case DataType::Type::kInt32: 2768 case DataType::Type::kInt64: 2769 if (in.IsRegister()) { 2770 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2771 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2772 __ movsxw(out.AsRegister<CpuRegister>(), 2773 Address(CpuRegister(RSP), in.GetStackIndex())); 2774 } else { 2775 __ movl(out.AsRegister<CpuRegister>(), 2776 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant())))); 2777 } 2778 break; 2779 2780 default: 2781 LOG(FATAL) << "Unexpected type conversion from " << input_type 2782 << " to " << result_type; 2783 } 2784 break; 2785 2786 case DataType::Type::kInt32: 2787 switch (input_type) { 2788 case DataType::Type::kInt64: 2789 if (in.IsRegister()) { 2790 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2791 } else if (in.IsDoubleStackSlot()) { 2792 __ movl(out.AsRegister<CpuRegister>(), 2793 Address(CpuRegister(RSP), in.GetStackIndex())); 2794 } else { 2795 DCHECK(in.IsConstant()); 2796 DCHECK(in.GetConstant()->IsLongConstant()); 2797 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2798 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 2799 } 2800 break; 2801 2802 case DataType::Type::kFloat32: { 2803 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2804 CpuRegister output = out.AsRegister<CpuRegister>(); 2805 NearLabel done, nan; 2806 2807 __ movl(output, Immediate(kPrimIntMax)); 2808 // if input >= (float)INT_MAX goto done 2809 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax)); 2810 __ j(kAboveEqual, &done); 2811 // if input == NaN goto nan 2812 __ j(kUnordered, &nan); 2813 // output = float-to-int-truncate(input) 2814 __ cvttss2si(output, input, false); 2815 __ jmp(&done); 2816 __ Bind(&nan); 2817 // output = 0 2818 __ xorl(output, output); 2819 __ Bind(&done); 2820 break; 2821 } 2822 2823 case DataType::Type::kFloat64: { 2824 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2825 CpuRegister output = out.AsRegister<CpuRegister>(); 2826 NearLabel done, nan; 2827 2828 __ movl(output, Immediate(kPrimIntMax)); 2829 // if input >= (double)INT_MAX goto done 2830 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax)); 2831 __ j(kAboveEqual, &done); 2832 // if input == NaN goto nan 2833 __ j(kUnordered, &nan); 2834 // output = double-to-int-truncate(input) 2835 __ cvttsd2si(output, input); 2836 __ jmp(&done); 2837 __ Bind(&nan); 2838 // output = 0 2839 __ xorl(output, output); 2840 __ Bind(&done); 2841 break; 2842 } 2843 2844 default: 2845 LOG(FATAL) << "Unexpected type conversion from " << input_type 2846 << " to " << result_type; 2847 } 2848 break; 2849 2850 case DataType::Type::kInt64: 2851 switch (input_type) { 2852 DCHECK(out.IsRegister()); 2853 case DataType::Type::kBool: 2854 case DataType::Type::kUint8: 2855 case DataType::Type::kInt8: 2856 case DataType::Type::kUint16: 2857 case DataType::Type::kInt16: 2858 case DataType::Type::kInt32: 2859 DCHECK(in.IsRegister()); 2860 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2861 break; 2862 2863 case DataType::Type::kFloat32: { 2864 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2865 CpuRegister output = out.AsRegister<CpuRegister>(); 2866 NearLabel done, nan; 2867 2868 codegen_->Load64BitValue(output, kPrimLongMax); 2869 // if input >= (float)LONG_MAX goto done 2870 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax)); 2871 __ j(kAboveEqual, &done); 2872 // if input == NaN goto nan 2873 __ j(kUnordered, &nan); 2874 // output = float-to-long-truncate(input) 2875 __ cvttss2si(output, input, true); 2876 __ jmp(&done); 2877 __ Bind(&nan); 2878 // output = 0 2879 __ xorl(output, output); 2880 __ Bind(&done); 2881 break; 2882 } 2883 2884 case DataType::Type::kFloat64: { 2885 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2886 CpuRegister output = out.AsRegister<CpuRegister>(); 2887 NearLabel done, nan; 2888 2889 codegen_->Load64BitValue(output, kPrimLongMax); 2890 // if input >= (double)LONG_MAX goto done 2891 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax)); 2892 __ j(kAboveEqual, &done); 2893 // if input == NaN goto nan 2894 __ j(kUnordered, &nan); 2895 // output = double-to-long-truncate(input) 2896 __ cvttsd2si(output, input, true); 2897 __ jmp(&done); 2898 __ Bind(&nan); 2899 // output = 0 2900 __ xorl(output, output); 2901 __ Bind(&done); 2902 break; 2903 } 2904 2905 default: 2906 LOG(FATAL) << "Unexpected type conversion from " << input_type 2907 << " to " << result_type; 2908 } 2909 break; 2910 2911 case DataType::Type::kFloat32: 2912 switch (input_type) { 2913 case DataType::Type::kBool: 2914 case DataType::Type::kUint8: 2915 case DataType::Type::kInt8: 2916 case DataType::Type::kUint16: 2917 case DataType::Type::kInt16: 2918 case DataType::Type::kInt32: 2919 if (in.IsRegister()) { 2920 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 2921 } else if (in.IsConstant()) { 2922 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 2923 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2924 codegen_->Load32BitValue(dest, static_cast<float>(v)); 2925 } else { 2926 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 2927 Address(CpuRegister(RSP), in.GetStackIndex()), false); 2928 } 2929 break; 2930 2931 case DataType::Type::kInt64: 2932 if (in.IsRegister()) { 2933 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 2934 } else if (in.IsConstant()) { 2935 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 2936 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2937 codegen_->Load32BitValue(dest, static_cast<float>(v)); 2938 } else { 2939 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 2940 Address(CpuRegister(RSP), in.GetStackIndex()), true); 2941 } 2942 break; 2943 2944 case DataType::Type::kFloat64: 2945 if (in.IsFpuRegister()) { 2946 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 2947 } else if (in.IsConstant()) { 2948 double v = in.GetConstant()->AsDoubleConstant()->GetValue(); 2949 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2950 codegen_->Load32BitValue(dest, static_cast<float>(v)); 2951 } else { 2952 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), 2953 Address(CpuRegister(RSP), in.GetStackIndex())); 2954 } 2955 break; 2956 2957 default: 2958 LOG(FATAL) << "Unexpected type conversion from " << input_type 2959 << " to " << result_type; 2960 } 2961 break; 2962 2963 case DataType::Type::kFloat64: 2964 switch (input_type) { 2965 case DataType::Type::kBool: 2966 case DataType::Type::kUint8: 2967 case DataType::Type::kInt8: 2968 case DataType::Type::kUint16: 2969 case DataType::Type::kInt16: 2970 case DataType::Type::kInt32: 2971 if (in.IsRegister()) { 2972 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 2973 } else if (in.IsConstant()) { 2974 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 2975 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2976 codegen_->Load64BitValue(dest, static_cast<double>(v)); 2977 } else { 2978 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 2979 Address(CpuRegister(RSP), in.GetStackIndex()), false); 2980 } 2981 break; 2982 2983 case DataType::Type::kInt64: 2984 if (in.IsRegister()) { 2985 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 2986 } else if (in.IsConstant()) { 2987 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 2988 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 2989 codegen_->Load64BitValue(dest, static_cast<double>(v)); 2990 } else { 2991 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 2992 Address(CpuRegister(RSP), in.GetStackIndex()), true); 2993 } 2994 break; 2995 2996 case DataType::Type::kFloat32: 2997 if (in.IsFpuRegister()) { 2998 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 2999 } else if (in.IsConstant()) { 3000 float v = in.GetConstant()->AsFloatConstant()->GetValue(); 3001 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3002 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3003 } else { 3004 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), 3005 Address(CpuRegister(RSP), in.GetStackIndex())); 3006 } 3007 break; 3008 3009 default: 3010 LOG(FATAL) << "Unexpected type conversion from " << input_type 3011 << " to " << result_type; 3012 } 3013 break; 3014 3015 default: 3016 LOG(FATAL) << "Unexpected type conversion from " << input_type 3017 << " to " << result_type; 3018 } 3019 } 3020 3021 void LocationsBuilderX86_64::VisitAdd(HAdd* add) { 3022 LocationSummary* locations = 3023 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); 3024 switch (add->GetResultType()) { 3025 case DataType::Type::kInt32: { 3026 locations->SetInAt(0, Location::RequiresRegister()); 3027 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); 3028 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3029 break; 3030 } 3031 3032 case DataType::Type::kInt64: { 3033 locations->SetInAt(0, Location::RequiresRegister()); 3034 // We can use a leaq or addq if the constant can fit in an immediate. 3035 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); 3036 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3037 break; 3038 } 3039 3040 case DataType::Type::kFloat64: 3041 case DataType::Type::kFloat32: { 3042 locations->SetInAt(0, Location::RequiresFpuRegister()); 3043 locations->SetInAt(1, Location::Any()); 3044 locations->SetOut(Location::SameAsFirstInput()); 3045 break; 3046 } 3047 3048 default: 3049 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3050 } 3051 } 3052 3053 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { 3054 LocationSummary* locations = add->GetLocations(); 3055 Location first = locations->InAt(0); 3056 Location second = locations->InAt(1); 3057 Location out = locations->Out(); 3058 3059 switch (add->GetResultType()) { 3060 case DataType::Type::kInt32: { 3061 if (second.IsRegister()) { 3062 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3063 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3064 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3065 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3066 } else { 3067 __ leal(out.AsRegister<CpuRegister>(), Address( 3068 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3069 } 3070 } else if (second.IsConstant()) { 3071 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3072 __ addl(out.AsRegister<CpuRegister>(), 3073 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 3074 } else { 3075 __ leal(out.AsRegister<CpuRegister>(), Address( 3076 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue())); 3077 } 3078 } else { 3079 DCHECK(first.Equals(locations->Out())); 3080 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3081 } 3082 break; 3083 } 3084 3085 case DataType::Type::kInt64: { 3086 if (second.IsRegister()) { 3087 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3088 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3089 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3090 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3091 } else { 3092 __ leaq(out.AsRegister<CpuRegister>(), Address( 3093 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3094 } 3095 } else { 3096 DCHECK(second.IsConstant()); 3097 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3098 int32_t int32_value = Low32Bits(value); 3099 DCHECK_EQ(int32_value, value); 3100 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3101 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value)); 3102 } else { 3103 __ leaq(out.AsRegister<CpuRegister>(), Address( 3104 first.AsRegister<CpuRegister>(), int32_value)); 3105 } 3106 } 3107 break; 3108 } 3109 3110 case DataType::Type::kFloat32: { 3111 if (second.IsFpuRegister()) { 3112 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3113 } else if (second.IsConstant()) { 3114 __ addss(first.AsFpuRegister<XmmRegister>(), 3115 codegen_->LiteralFloatAddress( 3116 second.GetConstant()->AsFloatConstant()->GetValue())); 3117 } else { 3118 DCHECK(second.IsStackSlot()); 3119 __ addss(first.AsFpuRegister<XmmRegister>(), 3120 Address(CpuRegister(RSP), second.GetStackIndex())); 3121 } 3122 break; 3123 } 3124 3125 case DataType::Type::kFloat64: { 3126 if (second.IsFpuRegister()) { 3127 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3128 } else if (second.IsConstant()) { 3129 __ addsd(first.AsFpuRegister<XmmRegister>(), 3130 codegen_->LiteralDoubleAddress( 3131 second.GetConstant()->AsDoubleConstant()->GetValue())); 3132 } else { 3133 DCHECK(second.IsDoubleStackSlot()); 3134 __ addsd(first.AsFpuRegister<XmmRegister>(), 3135 Address(CpuRegister(RSP), second.GetStackIndex())); 3136 } 3137 break; 3138 } 3139 3140 default: 3141 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3142 } 3143 } 3144 3145 void LocationsBuilderX86_64::VisitSub(HSub* sub) { 3146 LocationSummary* locations = 3147 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); 3148 switch (sub->GetResultType()) { 3149 case DataType::Type::kInt32: { 3150 locations->SetInAt(0, Location::RequiresRegister()); 3151 locations->SetInAt(1, Location::Any()); 3152 locations->SetOut(Location::SameAsFirstInput()); 3153 break; 3154 } 3155 case DataType::Type::kInt64: { 3156 locations->SetInAt(0, Location::RequiresRegister()); 3157 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); 3158 locations->SetOut(Location::SameAsFirstInput()); 3159 break; 3160 } 3161 case DataType::Type::kFloat32: 3162 case DataType::Type::kFloat64: { 3163 locations->SetInAt(0, Location::RequiresFpuRegister()); 3164 locations->SetInAt(1, Location::Any()); 3165 locations->SetOut(Location::SameAsFirstInput()); 3166 break; 3167 } 3168 default: 3169 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3170 } 3171 } 3172 3173 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { 3174 LocationSummary* locations = sub->GetLocations(); 3175 Location first = locations->InAt(0); 3176 Location second = locations->InAt(1); 3177 DCHECK(first.Equals(locations->Out())); 3178 switch (sub->GetResultType()) { 3179 case DataType::Type::kInt32: { 3180 if (second.IsRegister()) { 3181 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3182 } else if (second.IsConstant()) { 3183 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 3184 __ subl(first.AsRegister<CpuRegister>(), imm); 3185 } else { 3186 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3187 } 3188 break; 3189 } 3190 case DataType::Type::kInt64: { 3191 if (second.IsConstant()) { 3192 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3193 DCHECK(IsInt<32>(value)); 3194 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 3195 } else { 3196 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3197 } 3198 break; 3199 } 3200 3201 case DataType::Type::kFloat32: { 3202 if (second.IsFpuRegister()) { 3203 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3204 } else if (second.IsConstant()) { 3205 __ subss(first.AsFpuRegister<XmmRegister>(), 3206 codegen_->LiteralFloatAddress( 3207 second.GetConstant()->AsFloatConstant()->GetValue())); 3208 } else { 3209 DCHECK(second.IsStackSlot()); 3210 __ subss(first.AsFpuRegister<XmmRegister>(), 3211 Address(CpuRegister(RSP), second.GetStackIndex())); 3212 } 3213 break; 3214 } 3215 3216 case DataType::Type::kFloat64: { 3217 if (second.IsFpuRegister()) { 3218 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3219 } else if (second.IsConstant()) { 3220 __ subsd(first.AsFpuRegister<XmmRegister>(), 3221 codegen_->LiteralDoubleAddress( 3222 second.GetConstant()->AsDoubleConstant()->GetValue())); 3223 } else { 3224 DCHECK(second.IsDoubleStackSlot()); 3225 __ subsd(first.AsFpuRegister<XmmRegister>(), 3226 Address(CpuRegister(RSP), second.GetStackIndex())); 3227 } 3228 break; 3229 } 3230 3231 default: 3232 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3233 } 3234 } 3235 3236 void LocationsBuilderX86_64::VisitMul(HMul* mul) { 3237 LocationSummary* locations = 3238 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 3239 switch (mul->GetResultType()) { 3240 case DataType::Type::kInt32: { 3241 locations->SetInAt(0, Location::RequiresRegister()); 3242 locations->SetInAt(1, Location::Any()); 3243 if (mul->InputAt(1)->IsIntConstant()) { 3244 // Can use 3 operand multiply. 3245 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3246 } else { 3247 locations->SetOut(Location::SameAsFirstInput()); 3248 } 3249 break; 3250 } 3251 case DataType::Type::kInt64: { 3252 locations->SetInAt(0, Location::RequiresRegister()); 3253 locations->SetInAt(1, Location::Any()); 3254 if (mul->InputAt(1)->IsLongConstant() && 3255 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) { 3256 // Can use 3 operand multiply. 3257 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3258 } else { 3259 locations->SetOut(Location::SameAsFirstInput()); 3260 } 3261 break; 3262 } 3263 case DataType::Type::kFloat32: 3264 case DataType::Type::kFloat64: { 3265 locations->SetInAt(0, Location::RequiresFpuRegister()); 3266 locations->SetInAt(1, Location::Any()); 3267 locations->SetOut(Location::SameAsFirstInput()); 3268 break; 3269 } 3270 3271 default: 3272 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3273 } 3274 } 3275 3276 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { 3277 LocationSummary* locations = mul->GetLocations(); 3278 Location first = locations->InAt(0); 3279 Location second = locations->InAt(1); 3280 Location out = locations->Out(); 3281 switch (mul->GetResultType()) { 3282 case DataType::Type::kInt32: 3283 // The constant may have ended up in a register, so test explicitly to avoid 3284 // problems where the output may not be the same as the first operand. 3285 if (mul->InputAt(1)->IsIntConstant()) { 3286 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue()); 3287 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm); 3288 } else if (second.IsRegister()) { 3289 DCHECK(first.Equals(out)); 3290 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3291 } else { 3292 DCHECK(first.Equals(out)); 3293 DCHECK(second.IsStackSlot()); 3294 __ imull(first.AsRegister<CpuRegister>(), 3295 Address(CpuRegister(RSP), second.GetStackIndex())); 3296 } 3297 break; 3298 case DataType::Type::kInt64: { 3299 // The constant may have ended up in a register, so test explicitly to avoid 3300 // problems where the output may not be the same as the first operand. 3301 if (mul->InputAt(1)->IsLongConstant()) { 3302 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue(); 3303 if (IsInt<32>(value)) { 3304 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), 3305 Immediate(static_cast<int32_t>(value))); 3306 } else { 3307 // Have to use the constant area. 3308 DCHECK(first.Equals(out)); 3309 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value)); 3310 } 3311 } else if (second.IsRegister()) { 3312 DCHECK(first.Equals(out)); 3313 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3314 } else { 3315 DCHECK(second.IsDoubleStackSlot()); 3316 DCHECK(first.Equals(out)); 3317 __ imulq(first.AsRegister<CpuRegister>(), 3318 Address(CpuRegister(RSP), second.GetStackIndex())); 3319 } 3320 break; 3321 } 3322 3323 case DataType::Type::kFloat32: { 3324 DCHECK(first.Equals(out)); 3325 if (second.IsFpuRegister()) { 3326 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3327 } else if (second.IsConstant()) { 3328 __ mulss(first.AsFpuRegister<XmmRegister>(), 3329 codegen_->LiteralFloatAddress( 3330 second.GetConstant()->AsFloatConstant()->GetValue())); 3331 } else { 3332 DCHECK(second.IsStackSlot()); 3333 __ mulss(first.AsFpuRegister<XmmRegister>(), 3334 Address(CpuRegister(RSP), second.GetStackIndex())); 3335 } 3336 break; 3337 } 3338 3339 case DataType::Type::kFloat64: { 3340 DCHECK(first.Equals(out)); 3341 if (second.IsFpuRegister()) { 3342 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3343 } else if (second.IsConstant()) { 3344 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3345 codegen_->LiteralDoubleAddress( 3346 second.GetConstant()->AsDoubleConstant()->GetValue())); 3347 } else { 3348 DCHECK(second.IsDoubleStackSlot()); 3349 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3350 Address(CpuRegister(RSP), second.GetStackIndex())); 3351 } 3352 break; 3353 } 3354 3355 default: 3356 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3357 } 3358 } 3359 3360 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, 3361 uint32_t stack_adjustment, bool is_float) { 3362 if (source.IsStackSlot()) { 3363 DCHECK(is_float); 3364 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3365 } else if (source.IsDoubleStackSlot()) { 3366 DCHECK(!is_float); 3367 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3368 } else { 3369 // Write the value to the temporary location on the stack and load to FP stack. 3370 if (is_float) { 3371 Location stack_temp = Location::StackSlot(temp_offset); 3372 codegen_->Move(stack_temp, source); 3373 __ flds(Address(CpuRegister(RSP), temp_offset)); 3374 } else { 3375 Location stack_temp = Location::DoubleStackSlot(temp_offset); 3376 codegen_->Move(stack_temp, source); 3377 __ fldl(Address(CpuRegister(RSP), temp_offset)); 3378 } 3379 } 3380 } 3381 3382 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { 3383 DataType::Type type = rem->GetResultType(); 3384 bool is_float = type == DataType::Type::kFloat32; 3385 size_t elem_size = DataType::Size(type); 3386 LocationSummary* locations = rem->GetLocations(); 3387 Location first = locations->InAt(0); 3388 Location second = locations->InAt(1); 3389 Location out = locations->Out(); 3390 3391 // Create stack space for 2 elements. 3392 // TODO: enhance register allocator to ask for stack temporaries. 3393 __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); 3394 3395 // Load the values to the FP stack in reverse order, using temporaries if needed. 3396 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); 3397 PushOntoFPStack(first, 0, 2 * elem_size, is_float); 3398 3399 // Loop doing FPREM until we stabilize. 3400 NearLabel retry; 3401 __ Bind(&retry); 3402 __ fprem(); 3403 3404 // Move FP status to AX. 3405 __ fstsw(); 3406 3407 // And see if the argument reduction is complete. This is signaled by the 3408 // C2 FPU flag bit set to 0. 3409 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); 3410 __ j(kNotEqual, &retry); 3411 3412 // We have settled on the final value. Retrieve it into an XMM register. 3413 // Store FP top of stack to real stack. 3414 if (is_float) { 3415 __ fsts(Address(CpuRegister(RSP), 0)); 3416 } else { 3417 __ fstl(Address(CpuRegister(RSP), 0)); 3418 } 3419 3420 // Pop the 2 items from the FP stack. 3421 __ fucompp(); 3422 3423 // Load the value from the stack into an XMM register. 3424 DCHECK(out.IsFpuRegister()) << out; 3425 if (is_float) { 3426 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3427 } else { 3428 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3429 } 3430 3431 // And remove the temporary stack space we allocated. 3432 __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); 3433 } 3434 3435 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 3436 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3437 3438 LocationSummary* locations = instruction->GetLocations(); 3439 Location second = locations->InAt(1); 3440 DCHECK(second.IsConstant()); 3441 3442 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3443 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>(); 3444 int64_t imm = Int64FromConstant(second.GetConstant()); 3445 3446 DCHECK(imm == 1 || imm == -1); 3447 3448 switch (instruction->GetResultType()) { 3449 case DataType::Type::kInt32: { 3450 if (instruction->IsRem()) { 3451 __ xorl(output_register, output_register); 3452 } else { 3453 __ movl(output_register, input_register); 3454 if (imm == -1) { 3455 __ negl(output_register); 3456 } 3457 } 3458 break; 3459 } 3460 3461 case DataType::Type::kInt64: { 3462 if (instruction->IsRem()) { 3463 __ xorl(output_register, output_register); 3464 } else { 3465 __ movq(output_register, input_register); 3466 if (imm == -1) { 3467 __ negq(output_register); 3468 } 3469 } 3470 break; 3471 } 3472 3473 default: 3474 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); 3475 } 3476 } 3477 3478 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { 3479 LocationSummary* locations = instruction->GetLocations(); 3480 Location second = locations->InAt(1); 3481 3482 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3483 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); 3484 3485 int64_t imm = Int64FromConstant(second.GetConstant()); 3486 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3487 uint64_t abs_imm = AbsOrMin(imm); 3488 3489 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 3490 3491 if (instruction->GetResultType() == DataType::Type::kInt32) { 3492 __ leal(tmp, Address(numerator, abs_imm - 1)); 3493 __ testl(numerator, numerator); 3494 __ cmov(kGreaterEqual, tmp, numerator); 3495 int shift = CTZ(imm); 3496 __ sarl(tmp, Immediate(shift)); 3497 3498 if (imm < 0) { 3499 __ negl(tmp); 3500 } 3501 3502 __ movl(output_register, tmp); 3503 } else { 3504 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3505 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); 3506 3507 codegen_->Load64BitValue(rdx, abs_imm - 1); 3508 __ addq(rdx, numerator); 3509 __ testq(numerator, numerator); 3510 __ cmov(kGreaterEqual, rdx, numerator); 3511 int shift = CTZ(imm); 3512 __ sarq(rdx, Immediate(shift)); 3513 3514 if (imm < 0) { 3515 __ negq(rdx); 3516 } 3517 3518 __ movq(output_register, rdx); 3519 } 3520 } 3521 3522 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3523 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3524 3525 LocationSummary* locations = instruction->GetLocations(); 3526 Location second = locations->InAt(1); 3527 3528 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>() 3529 : locations->GetTemp(0).AsRegister<CpuRegister>(); 3530 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>(); 3531 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>() 3532 : locations->Out().AsRegister<CpuRegister>(); 3533 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3534 3535 DCHECK_EQ(RAX, eax.AsRegister()); 3536 DCHECK_EQ(RDX, edx.AsRegister()); 3537 if (instruction->IsDiv()) { 3538 DCHECK_EQ(RAX, out.AsRegister()); 3539 } else { 3540 DCHECK_EQ(RDX, out.AsRegister()); 3541 } 3542 3543 int64_t magic; 3544 int shift; 3545 3546 // TODO: can these branches be written as one? 3547 if (instruction->GetResultType() == DataType::Type::kInt32) { 3548 int imm = second.GetConstant()->AsIntConstant()->GetValue(); 3549 3550 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); 3551 3552 __ movl(numerator, eax); 3553 3554 __ movl(eax, Immediate(magic)); 3555 __ imull(numerator); 3556 3557 if (imm > 0 && magic < 0) { 3558 __ addl(edx, numerator); 3559 } else if (imm < 0 && magic > 0) { 3560 __ subl(edx, numerator); 3561 } 3562 3563 if (shift != 0) { 3564 __ sarl(edx, Immediate(shift)); 3565 } 3566 3567 __ movl(eax, edx); 3568 __ shrl(edx, Immediate(31)); 3569 __ addl(edx, eax); 3570 3571 if (instruction->IsRem()) { 3572 __ movl(eax, numerator); 3573 __ imull(edx, Immediate(imm)); 3574 __ subl(eax, edx); 3575 __ movl(edx, eax); 3576 } else { 3577 __ movl(eax, edx); 3578 } 3579 } else { 3580 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); 3581 3582 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3583 3584 CpuRegister rax = eax; 3585 CpuRegister rdx = edx; 3586 3587 CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift); 3588 3589 // Save the numerator. 3590 __ movq(numerator, rax); 3591 3592 // RAX = magic 3593 codegen_->Load64BitValue(rax, magic); 3594 3595 // RDX:RAX = magic * numerator 3596 __ imulq(numerator); 3597 3598 if (imm > 0 && magic < 0) { 3599 // RDX += numerator 3600 __ addq(rdx, numerator); 3601 } else if (imm < 0 && magic > 0) { 3602 // RDX -= numerator 3603 __ subq(rdx, numerator); 3604 } 3605 3606 // Shift if needed. 3607 if (shift != 0) { 3608 __ sarq(rdx, Immediate(shift)); 3609 } 3610 3611 // RDX += 1 if RDX < 0 3612 __ movq(rax, rdx); 3613 __ shrq(rdx, Immediate(63)); 3614 __ addq(rdx, rax); 3615 3616 if (instruction->IsRem()) { 3617 __ movq(rax, numerator); 3618 3619 if (IsInt<32>(imm)) { 3620 __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); 3621 } else { 3622 __ imulq(rdx, codegen_->LiteralInt64Address(imm)); 3623 } 3624 3625 __ subq(rax, rdx); 3626 __ movq(rdx, rax); 3627 } else { 3628 __ movq(rax, rdx); 3629 } 3630 } 3631 } 3632 3633 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { 3634 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3635 DataType::Type type = instruction->GetResultType(); 3636 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 3637 3638 bool is_div = instruction->IsDiv(); 3639 LocationSummary* locations = instruction->GetLocations(); 3640 3641 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3642 Location second = locations->InAt(1); 3643 3644 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); 3645 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister()); 3646 3647 if (second.IsConstant()) { 3648 int64_t imm = Int64FromConstant(second.GetConstant()); 3649 3650 if (imm == 0) { 3651 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 3652 } else if (imm == 1 || imm == -1) { 3653 DivRemOneOrMinusOne(instruction); 3654 } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { 3655 DivByPowerOfTwo(instruction->AsDiv()); 3656 } else { 3657 DCHECK(imm <= -2 || imm >= 2); 3658 GenerateDivRemWithAnyConstant(instruction); 3659 } 3660 } else { 3661 SlowPathCode* slow_path = 3662 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64( 3663 instruction, out.AsRegister(), type, is_div); 3664 codegen_->AddSlowPath(slow_path); 3665 3666 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3667 // 0x80000000(00000000)/-1 triggers an arithmetic exception! 3668 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) 3669 // so it's safe to just use negl instead of more complex comparisons. 3670 if (type == DataType::Type::kInt32) { 3671 __ cmpl(second_reg, Immediate(-1)); 3672 __ j(kEqual, slow_path->GetEntryLabel()); 3673 // edx:eax <- sign-extended of eax 3674 __ cdq(); 3675 // eax = quotient, edx = remainder 3676 __ idivl(second_reg); 3677 } else { 3678 __ cmpq(second_reg, Immediate(-1)); 3679 __ j(kEqual, slow_path->GetEntryLabel()); 3680 // rdx:rax <- sign-extended of rax 3681 __ cqo(); 3682 // rax = quotient, rdx = remainder 3683 __ idivq(second_reg); 3684 } 3685 __ Bind(slow_path->GetExitLabel()); 3686 } 3687 } 3688 3689 void LocationsBuilderX86_64::VisitDiv(HDiv* div) { 3690 LocationSummary* locations = 3691 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); 3692 switch (div->GetResultType()) { 3693 case DataType::Type::kInt32: 3694 case DataType::Type::kInt64: { 3695 locations->SetInAt(0, Location::RegisterLocation(RAX)); 3696 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3697 locations->SetOut(Location::SameAsFirstInput()); 3698 // Intel uses edx:eax as the dividend. 3699 locations->AddTemp(Location::RegisterLocation(RDX)); 3700 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way 3701 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as 3702 // output and request another temp. 3703 if (div->InputAt(1)->IsConstant()) { 3704 locations->AddTemp(Location::RequiresRegister()); 3705 } 3706 break; 3707 } 3708 3709 case DataType::Type::kFloat32: 3710 case DataType::Type::kFloat64: { 3711 locations->SetInAt(0, Location::RequiresFpuRegister()); 3712 locations->SetInAt(1, Location::Any()); 3713 locations->SetOut(Location::SameAsFirstInput()); 3714 break; 3715 } 3716 3717 default: 3718 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3719 } 3720 } 3721 3722 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { 3723 LocationSummary* locations = div->GetLocations(); 3724 Location first = locations->InAt(0); 3725 Location second = locations->InAt(1); 3726 DCHECK(first.Equals(locations->Out())); 3727 3728 DataType::Type type = div->GetResultType(); 3729 switch (type) { 3730 case DataType::Type::kInt32: 3731 case DataType::Type::kInt64: { 3732 GenerateDivRemIntegral(div); 3733 break; 3734 } 3735 3736 case DataType::Type::kFloat32: { 3737 if (second.IsFpuRegister()) { 3738 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3739 } else if (second.IsConstant()) { 3740 __ divss(first.AsFpuRegister<XmmRegister>(), 3741 codegen_->LiteralFloatAddress( 3742 second.GetConstant()->AsFloatConstant()->GetValue())); 3743 } else { 3744 DCHECK(second.IsStackSlot()); 3745 __ divss(first.AsFpuRegister<XmmRegister>(), 3746 Address(CpuRegister(RSP), second.GetStackIndex())); 3747 } 3748 break; 3749 } 3750 3751 case DataType::Type::kFloat64: { 3752 if (second.IsFpuRegister()) { 3753 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3754 } else if (second.IsConstant()) { 3755 __ divsd(first.AsFpuRegister<XmmRegister>(), 3756 codegen_->LiteralDoubleAddress( 3757 second.GetConstant()->AsDoubleConstant()->GetValue())); 3758 } else { 3759 DCHECK(second.IsDoubleStackSlot()); 3760 __ divsd(first.AsFpuRegister<XmmRegister>(), 3761 Address(CpuRegister(RSP), second.GetStackIndex())); 3762 } 3763 break; 3764 } 3765 3766 default: 3767 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3768 } 3769 } 3770 3771 void LocationsBuilderX86_64::VisitRem(HRem* rem) { 3772 DataType::Type type = rem->GetResultType(); 3773 LocationSummary* locations = 3774 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall); 3775 3776 switch (type) { 3777 case DataType::Type::kInt32: 3778 case DataType::Type::kInt64: { 3779 locations->SetInAt(0, Location::RegisterLocation(RAX)); 3780 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 3781 // Intel uses rdx:rax as the dividend and puts the remainder in rdx 3782 locations->SetOut(Location::RegisterLocation(RDX)); 3783 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way 3784 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as 3785 // output and request another temp. 3786 if (rem->InputAt(1)->IsConstant()) { 3787 locations->AddTemp(Location::RequiresRegister()); 3788 } 3789 break; 3790 } 3791 3792 case DataType::Type::kFloat32: 3793 case DataType::Type::kFloat64: { 3794 locations->SetInAt(0, Location::Any()); 3795 locations->SetInAt(1, Location::Any()); 3796 locations->SetOut(Location::RequiresFpuRegister()); 3797 locations->AddTemp(Location::RegisterLocation(RAX)); 3798 break; 3799 } 3800 3801 default: 3802 LOG(FATAL) << "Unexpected rem type " << type; 3803 } 3804 } 3805 3806 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { 3807 DataType::Type type = rem->GetResultType(); 3808 switch (type) { 3809 case DataType::Type::kInt32: 3810 case DataType::Type::kInt64: { 3811 GenerateDivRemIntegral(rem); 3812 break; 3813 } 3814 case DataType::Type::kFloat32: 3815 case DataType::Type::kFloat64: { 3816 GenerateRemFP(rem); 3817 break; 3818 } 3819 default: 3820 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); 3821 } 3822 } 3823 3824 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3825 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 3826 locations->SetInAt(0, Location::Any()); 3827 } 3828 3829 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3830 SlowPathCode* slow_path = 3831 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction); 3832 codegen_->AddSlowPath(slow_path); 3833 3834 LocationSummary* locations = instruction->GetLocations(); 3835 Location value = locations->InAt(0); 3836 3837 switch (instruction->GetType()) { 3838 case DataType::Type::kBool: 3839 case DataType::Type::kUint8: 3840 case DataType::Type::kInt8: 3841 case DataType::Type::kUint16: 3842 case DataType::Type::kInt16: 3843 case DataType::Type::kInt32: { 3844 if (value.IsRegister()) { 3845 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 3846 __ j(kEqual, slow_path->GetEntryLabel()); 3847 } else if (value.IsStackSlot()) { 3848 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 3849 __ j(kEqual, slow_path->GetEntryLabel()); 3850 } else { 3851 DCHECK(value.IsConstant()) << value; 3852 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { 3853 __ jmp(slow_path->GetEntryLabel()); 3854 } 3855 } 3856 break; 3857 } 3858 case DataType::Type::kInt64: { 3859 if (value.IsRegister()) { 3860 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 3861 __ j(kEqual, slow_path->GetEntryLabel()); 3862 } else if (value.IsDoubleStackSlot()) { 3863 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 3864 __ j(kEqual, slow_path->GetEntryLabel()); 3865 } else { 3866 DCHECK(value.IsConstant()) << value; 3867 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { 3868 __ jmp(slow_path->GetEntryLabel()); 3869 } 3870 } 3871 break; 3872 } 3873 default: 3874 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); 3875 } 3876 } 3877 3878 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) { 3879 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 3880 3881 LocationSummary* locations = 3882 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); 3883 3884 switch (op->GetResultType()) { 3885 case DataType::Type::kInt32: 3886 case DataType::Type::kInt64: { 3887 locations->SetInAt(0, Location::RequiresRegister()); 3888 // The shift count needs to be in CL. 3889 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1))); 3890 locations->SetOut(Location::SameAsFirstInput()); 3891 break; 3892 } 3893 default: 3894 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 3895 } 3896 } 3897 3898 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { 3899 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 3900 3901 LocationSummary* locations = op->GetLocations(); 3902 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 3903 Location second = locations->InAt(1); 3904 3905 switch (op->GetResultType()) { 3906 case DataType::Type::kInt32: { 3907 if (second.IsRegister()) { 3908 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3909 if (op->IsShl()) { 3910 __ shll(first_reg, second_reg); 3911 } else if (op->IsShr()) { 3912 __ sarl(first_reg, second_reg); 3913 } else { 3914 __ shrl(first_reg, second_reg); 3915 } 3916 } else { 3917 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 3918 if (op->IsShl()) { 3919 __ shll(first_reg, imm); 3920 } else if (op->IsShr()) { 3921 __ sarl(first_reg, imm); 3922 } else { 3923 __ shrl(first_reg, imm); 3924 } 3925 } 3926 break; 3927 } 3928 case DataType::Type::kInt64: { 3929 if (second.IsRegister()) { 3930 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3931 if (op->IsShl()) { 3932 __ shlq(first_reg, second_reg); 3933 } else if (op->IsShr()) { 3934 __ sarq(first_reg, second_reg); 3935 } else { 3936 __ shrq(first_reg, second_reg); 3937 } 3938 } else { 3939 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 3940 if (op->IsShl()) { 3941 __ shlq(first_reg, imm); 3942 } else if (op->IsShr()) { 3943 __ sarq(first_reg, imm); 3944 } else { 3945 __ shrq(first_reg, imm); 3946 } 3947 } 3948 break; 3949 } 3950 default: 3951 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 3952 UNREACHABLE(); 3953 } 3954 } 3955 3956 void LocationsBuilderX86_64::VisitRor(HRor* ror) { 3957 LocationSummary* locations = 3958 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); 3959 3960 switch (ror->GetResultType()) { 3961 case DataType::Type::kInt32: 3962 case DataType::Type::kInt64: { 3963 locations->SetInAt(0, Location::RequiresRegister()); 3964 // The shift count needs to be in CL (unless it is a constant). 3965 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1))); 3966 locations->SetOut(Location::SameAsFirstInput()); 3967 break; 3968 } 3969 default: 3970 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 3971 UNREACHABLE(); 3972 } 3973 } 3974 3975 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { 3976 LocationSummary* locations = ror->GetLocations(); 3977 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 3978 Location second = locations->InAt(1); 3979 3980 switch (ror->GetResultType()) { 3981 case DataType::Type::kInt32: 3982 if (second.IsRegister()) { 3983 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3984 __ rorl(first_reg, second_reg); 3985 } else { 3986 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 3987 __ rorl(first_reg, imm); 3988 } 3989 break; 3990 case DataType::Type::kInt64: 3991 if (second.IsRegister()) { 3992 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3993 __ rorq(first_reg, second_reg); 3994 } else { 3995 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 3996 __ rorq(first_reg, imm); 3997 } 3998 break; 3999 default: 4000 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4001 UNREACHABLE(); 4002 } 4003 } 4004 4005 void LocationsBuilderX86_64::VisitShl(HShl* shl) { 4006 HandleShift(shl); 4007 } 4008 4009 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) { 4010 HandleShift(shl); 4011 } 4012 4013 void LocationsBuilderX86_64::VisitShr(HShr* shr) { 4014 HandleShift(shr); 4015 } 4016 4017 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) { 4018 HandleShift(shr); 4019 } 4020 4021 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) { 4022 HandleShift(ushr); 4023 } 4024 4025 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) { 4026 HandleShift(ushr); 4027 } 4028 4029 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { 4030 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4031 instruction, LocationSummary::kCallOnMainOnly); 4032 InvokeRuntimeCallingConvention calling_convention; 4033 if (instruction->IsStringAlloc()) { 4034 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); 4035 } else { 4036 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4037 } 4038 locations->SetOut(Location::RegisterLocation(RAX)); 4039 } 4040 4041 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { 4042 // Note: if heap poisoning is enabled, the entry point takes cares 4043 // of poisoning the reference. 4044 if (instruction->IsStringAlloc()) { 4045 // String is allocated through StringFactory. Call NewEmptyString entry point. 4046 CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); 4047 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize); 4048 __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true)); 4049 __ call(Address(temp, code_offset.SizeValue())); 4050 codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); 4051 } else { 4052 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 4053 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 4054 DCHECK(!codegen_->IsLeafMethod()); 4055 } 4056 } 4057 4058 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { 4059 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4060 instruction, LocationSummary::kCallOnMainOnly); 4061 InvokeRuntimeCallingConvention calling_convention; 4062 locations->SetOut(Location::RegisterLocation(RAX)); 4063 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4064 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 4065 } 4066 4067 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { 4068 // Note: if heap poisoning is enabled, the entry point takes cares 4069 // of poisoning the reference. 4070 QuickEntrypointEnum entrypoint = 4071 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); 4072 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 4073 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 4074 DCHECK(!codegen_->IsLeafMethod()); 4075 } 4076 4077 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { 4078 LocationSummary* locations = 4079 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4080 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 4081 if (location.IsStackSlot()) { 4082 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4083 } else if (location.IsDoubleStackSlot()) { 4084 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4085 } 4086 locations->SetOut(location); 4087 } 4088 4089 void InstructionCodeGeneratorX86_64::VisitParameterValue( 4090 HParameterValue* instruction ATTRIBUTE_UNUSED) { 4091 // Nothing to do, the parameter is already at its location. 4092 } 4093 4094 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) { 4095 LocationSummary* locations = 4096 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4097 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); 4098 } 4099 4100 void InstructionCodeGeneratorX86_64::VisitCurrentMethod( 4101 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 4102 // Nothing to do, the method is already at its location. 4103 } 4104 4105 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4106 LocationSummary* locations = 4107 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4108 locations->SetInAt(0, Location::RequiresRegister()); 4109 locations->SetOut(Location::RequiresRegister()); 4110 } 4111 4112 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4113 LocationSummary* locations = instruction->GetLocations(); 4114 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 4115 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4116 instruction->GetIndex(), kX86_64PointerSize).SizeValue(); 4117 __ movq(locations->Out().AsRegister<CpuRegister>(), 4118 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); 4119 } else { 4120 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4121 instruction->GetIndex(), kX86_64PointerSize)); 4122 __ movq(locations->Out().AsRegister<CpuRegister>(), 4123 Address(locations->InAt(0).AsRegister<CpuRegister>(), 4124 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 4125 __ movq(locations->Out().AsRegister<CpuRegister>(), 4126 Address(locations->Out().AsRegister<CpuRegister>(), method_offset)); 4127 } 4128 } 4129 4130 void LocationsBuilderX86_64::VisitNot(HNot* not_) { 4131 LocationSummary* locations = 4132 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); 4133 locations->SetInAt(0, Location::RequiresRegister()); 4134 locations->SetOut(Location::SameAsFirstInput()); 4135 } 4136 4137 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { 4138 LocationSummary* locations = not_->GetLocations(); 4139 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4140 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4141 Location out = locations->Out(); 4142 switch (not_->GetResultType()) { 4143 case DataType::Type::kInt32: 4144 __ notl(out.AsRegister<CpuRegister>()); 4145 break; 4146 4147 case DataType::Type::kInt64: 4148 __ notq(out.AsRegister<CpuRegister>()); 4149 break; 4150 4151 default: 4152 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); 4153 } 4154 } 4155 4156 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4157 LocationSummary* locations = 4158 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); 4159 locations->SetInAt(0, Location::RequiresRegister()); 4160 locations->SetOut(Location::SameAsFirstInput()); 4161 } 4162 4163 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4164 LocationSummary* locations = bool_not->GetLocations(); 4165 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4166 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4167 Location out = locations->Out(); 4168 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1)); 4169 } 4170 4171 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { 4172 LocationSummary* locations = 4173 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4174 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 4175 locations->SetInAt(i, Location::Any()); 4176 } 4177 locations->SetOut(Location::Any()); 4178 } 4179 4180 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 4181 LOG(FATAL) << "Unimplemented"; 4182 } 4183 4184 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { 4185 /* 4186 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence. 4187 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model. 4188 * For those cases, all we need to ensure is that there is a scheduling barrier in place. 4189 */ 4190 switch (kind) { 4191 case MemBarrierKind::kAnyAny: { 4192 MemoryFence(); 4193 break; 4194 } 4195 case MemBarrierKind::kAnyStore: 4196 case MemBarrierKind::kLoadAny: 4197 case MemBarrierKind::kStoreStore: { 4198 // nop 4199 break; 4200 } 4201 case MemBarrierKind::kNTStoreStore: 4202 // Non-Temporal Store/Store needs an explicit fence. 4203 MemoryFence(/* non-temporal */ true); 4204 break; 4205 } 4206 } 4207 4208 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { 4209 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4210 4211 bool object_field_get_with_read_barrier = 4212 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 4213 LocationSummary* locations = 4214 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 4215 object_field_get_with_read_barrier 4216 ? LocationSummary::kCallOnSlowPath 4217 : LocationSummary::kNoCall); 4218 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 4219 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4220 } 4221 locations->SetInAt(0, Location::RequiresRegister()); 4222 if (DataType::IsFloatingPointType(instruction->GetType())) { 4223 locations->SetOut(Location::RequiresFpuRegister()); 4224 } else { 4225 // The output overlaps for an object field get when read barriers 4226 // are enabled: we do not want the move to overwrite the object's 4227 // location, as we need it to emit the read barrier. 4228 locations->SetOut( 4229 Location::RequiresRegister(), 4230 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 4231 } 4232 } 4233 4234 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, 4235 const FieldInfo& field_info) { 4236 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4237 4238 LocationSummary* locations = instruction->GetLocations(); 4239 Location base_loc = locations->InAt(0); 4240 CpuRegister base = base_loc.AsRegister<CpuRegister>(); 4241 Location out = locations->Out(); 4242 bool is_volatile = field_info.IsVolatile(); 4243 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 4244 DataType::Type load_type = instruction->GetType(); 4245 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4246 4247 switch (load_type) { 4248 case DataType::Type::kBool: 4249 case DataType::Type::kUint8: { 4250 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4251 break; 4252 } 4253 4254 case DataType::Type::kInt8: { 4255 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4256 break; 4257 } 4258 4259 case DataType::Type::kUint16: { 4260 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4261 break; 4262 } 4263 4264 case DataType::Type::kInt16: { 4265 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4266 break; 4267 } 4268 4269 case DataType::Type::kInt32: { 4270 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4271 break; 4272 } 4273 4274 case DataType::Type::kReference: { 4275 // /* HeapReference<Object> */ out = *(base + offset) 4276 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 4277 // Note that a potential implicit null check is handled in this 4278 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. 4279 codegen_->GenerateFieldLoadWithBakerReadBarrier( 4280 instruction, out, base, offset, /* needs_null_check */ true); 4281 if (is_volatile) { 4282 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4283 } 4284 } else { 4285 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4286 codegen_->MaybeRecordImplicitNullCheck(instruction); 4287 if (is_volatile) { 4288 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4289 } 4290 // If read barriers are enabled, emit read barriers other than 4291 // Baker's using a slow path (and also unpoison the loaded 4292 // reference, if heap poisoning is enabled). 4293 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 4294 } 4295 break; 4296 } 4297 4298 case DataType::Type::kInt64: { 4299 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); 4300 break; 4301 } 4302 4303 case DataType::Type::kFloat32: { 4304 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4305 break; 4306 } 4307 4308 case DataType::Type::kFloat64: { 4309 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4310 break; 4311 } 4312 4313 case DataType::Type::kUint32: 4314 case DataType::Type::kUint64: 4315 case DataType::Type::kVoid: 4316 LOG(FATAL) << "Unreachable type " << load_type; 4317 UNREACHABLE(); 4318 } 4319 4320 if (load_type == DataType::Type::kReference) { 4321 // Potential implicit null checks, in the case of reference 4322 // fields, are handled in the previous switch statement. 4323 } else { 4324 codegen_->MaybeRecordImplicitNullCheck(instruction); 4325 } 4326 4327 if (is_volatile) { 4328 if (load_type == DataType::Type::kReference) { 4329 // Memory barriers, in the case of references, are also handled 4330 // in the previous switch statement. 4331 } else { 4332 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4333 } 4334 } 4335 } 4336 4337 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, 4338 const FieldInfo& field_info) { 4339 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4340 4341 LocationSummary* locations = 4342 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4343 DataType::Type field_type = field_info.GetFieldType(); 4344 bool is_volatile = field_info.IsVolatile(); 4345 bool needs_write_barrier = 4346 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); 4347 4348 locations->SetInAt(0, Location::RequiresRegister()); 4349 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 4350 if (is_volatile) { 4351 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4352 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); 4353 } else { 4354 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); 4355 } 4356 } else { 4357 if (is_volatile) { 4358 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4359 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1))); 4360 } else { 4361 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4362 } 4363 } 4364 if (needs_write_barrier) { 4365 // Temporary registers for the write barrier. 4366 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 4367 locations->AddTemp(Location::RequiresRegister()); 4368 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 4369 // Temporary register for the reference poisoning. 4370 locations->AddTemp(Location::RequiresRegister()); 4371 } 4372 } 4373 4374 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, 4375 const FieldInfo& field_info, 4376 bool value_can_be_null) { 4377 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4378 4379 LocationSummary* locations = instruction->GetLocations(); 4380 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); 4381 Location value = locations->InAt(1); 4382 bool is_volatile = field_info.IsVolatile(); 4383 DataType::Type field_type = field_info.GetFieldType(); 4384 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4385 4386 if (is_volatile) { 4387 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); 4388 } 4389 4390 bool maybe_record_implicit_null_check_done = false; 4391 4392 switch (field_type) { 4393 case DataType::Type::kBool: 4394 case DataType::Type::kUint8: 4395 case DataType::Type::kInt8: { 4396 if (value.IsConstant()) { 4397 __ movb(Address(base, offset), 4398 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); 4399 } else { 4400 __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); 4401 } 4402 break; 4403 } 4404 4405 case DataType::Type::kUint16: 4406 case DataType::Type::kInt16: { 4407 if (value.IsConstant()) { 4408 __ movw(Address(base, offset), 4409 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 4410 } else { 4411 __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); 4412 } 4413 break; 4414 } 4415 4416 case DataType::Type::kInt32: 4417 case DataType::Type::kReference: { 4418 if (value.IsConstant()) { 4419 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4420 // `field_type == DataType::Type::kReference` implies `v == 0`. 4421 DCHECK((field_type != DataType::Type::kReference) || (v == 0)); 4422 // Note: if heap poisoning is enabled, no need to poison 4423 // (negate) `v` if it is a reference, as it would be null. 4424 __ movl(Address(base, offset), Immediate(v)); 4425 } else { 4426 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 4427 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4428 __ movl(temp, value.AsRegister<CpuRegister>()); 4429 __ PoisonHeapReference(temp); 4430 __ movl(Address(base, offset), temp); 4431 } else { 4432 __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); 4433 } 4434 } 4435 break; 4436 } 4437 4438 case DataType::Type::kInt64: { 4439 if (value.IsConstant()) { 4440 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 4441 codegen_->MoveInt64ToAddress(Address(base, offset), 4442 Address(base, offset + sizeof(int32_t)), 4443 v, 4444 instruction); 4445 maybe_record_implicit_null_check_done = true; 4446 } else { 4447 __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); 4448 } 4449 break; 4450 } 4451 4452 case DataType::Type::kFloat32: { 4453 if (value.IsConstant()) { 4454 int32_t v = 4455 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 4456 __ movl(Address(base, offset), Immediate(v)); 4457 } else { 4458 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4459 } 4460 break; 4461 } 4462 4463 case DataType::Type::kFloat64: { 4464 if (value.IsConstant()) { 4465 int64_t v = 4466 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 4467 codegen_->MoveInt64ToAddress(Address(base, offset), 4468 Address(base, offset + sizeof(int32_t)), 4469 v, 4470 instruction); 4471 maybe_record_implicit_null_check_done = true; 4472 } else { 4473 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4474 } 4475 break; 4476 } 4477 4478 case DataType::Type::kUint32: 4479 case DataType::Type::kUint64: 4480 case DataType::Type::kVoid: 4481 LOG(FATAL) << "Unreachable type " << field_type; 4482 UNREACHABLE(); 4483 } 4484 4485 if (!maybe_record_implicit_null_check_done) { 4486 codegen_->MaybeRecordImplicitNullCheck(instruction); 4487 } 4488 4489 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 4490 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4491 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 4492 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null); 4493 } 4494 4495 if (is_volatile) { 4496 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 4497 } 4498 } 4499 4500 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4501 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4502 } 4503 4504 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4505 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4506 } 4507 4508 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4509 HandleFieldGet(instruction); 4510 } 4511 4512 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4513 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4514 } 4515 4516 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4517 HandleFieldGet(instruction); 4518 } 4519 4520 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4521 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4522 } 4523 4524 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4525 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4526 } 4527 4528 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4529 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4530 } 4531 4532 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet( 4533 HUnresolvedInstanceFieldGet* instruction) { 4534 FieldAccessCallingConventionX86_64 calling_convention; 4535 codegen_->CreateUnresolvedFieldLocationSummary( 4536 instruction, instruction->GetFieldType(), calling_convention); 4537 } 4538 4539 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet( 4540 HUnresolvedInstanceFieldGet* instruction) { 4541 FieldAccessCallingConventionX86_64 calling_convention; 4542 codegen_->GenerateUnresolvedFieldAccess(instruction, 4543 instruction->GetFieldType(), 4544 instruction->GetFieldIndex(), 4545 instruction->GetDexPc(), 4546 calling_convention); 4547 } 4548 4549 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet( 4550 HUnresolvedInstanceFieldSet* instruction) { 4551 FieldAccessCallingConventionX86_64 calling_convention; 4552 codegen_->CreateUnresolvedFieldLocationSummary( 4553 instruction, instruction->GetFieldType(), calling_convention); 4554 } 4555 4556 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet( 4557 HUnresolvedInstanceFieldSet* instruction) { 4558 FieldAccessCallingConventionX86_64 calling_convention; 4559 codegen_->GenerateUnresolvedFieldAccess(instruction, 4560 instruction->GetFieldType(), 4561 instruction->GetFieldIndex(), 4562 instruction->GetDexPc(), 4563 calling_convention); 4564 } 4565 4566 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet( 4567 HUnresolvedStaticFieldGet* instruction) { 4568 FieldAccessCallingConventionX86_64 calling_convention; 4569 codegen_->CreateUnresolvedFieldLocationSummary( 4570 instruction, instruction->GetFieldType(), calling_convention); 4571 } 4572 4573 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet( 4574 HUnresolvedStaticFieldGet* instruction) { 4575 FieldAccessCallingConventionX86_64 calling_convention; 4576 codegen_->GenerateUnresolvedFieldAccess(instruction, 4577 instruction->GetFieldType(), 4578 instruction->GetFieldIndex(), 4579 instruction->GetDexPc(), 4580 calling_convention); 4581 } 4582 4583 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet( 4584 HUnresolvedStaticFieldSet* instruction) { 4585 FieldAccessCallingConventionX86_64 calling_convention; 4586 codegen_->CreateUnresolvedFieldLocationSummary( 4587 instruction, instruction->GetFieldType(), calling_convention); 4588 } 4589 4590 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet( 4591 HUnresolvedStaticFieldSet* instruction) { 4592 FieldAccessCallingConventionX86_64 calling_convention; 4593 codegen_->GenerateUnresolvedFieldAccess(instruction, 4594 instruction->GetFieldType(), 4595 instruction->GetFieldIndex(), 4596 instruction->GetDexPc(), 4597 calling_convention); 4598 } 4599 4600 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { 4601 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 4602 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() 4603 ? Location::RequiresRegister() 4604 : Location::Any(); 4605 locations->SetInAt(0, loc); 4606 } 4607 4608 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { 4609 if (CanMoveNullCheckToUser(instruction)) { 4610 return; 4611 } 4612 LocationSummary* locations = instruction->GetLocations(); 4613 Location obj = locations->InAt(0); 4614 4615 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0)); 4616 RecordPcInfo(instruction, instruction->GetDexPc()); 4617 } 4618 4619 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) { 4620 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction); 4621 AddSlowPath(slow_path); 4622 4623 LocationSummary* locations = instruction->GetLocations(); 4624 Location obj = locations->InAt(0); 4625 4626 if (obj.IsRegister()) { 4627 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>()); 4628 } else if (obj.IsStackSlot()) { 4629 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); 4630 } else { 4631 DCHECK(obj.IsConstant()) << obj; 4632 DCHECK(obj.GetConstant()->IsNullConstant()); 4633 __ jmp(slow_path->GetEntryLabel()); 4634 return; 4635 } 4636 __ j(kEqual, slow_path->GetEntryLabel()); 4637 } 4638 4639 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { 4640 codegen_->GenerateNullCheck(instruction); 4641 } 4642 4643 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { 4644 bool object_array_get_with_read_barrier = 4645 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 4646 LocationSummary* locations = 4647 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 4648 object_array_get_with_read_barrier 4649 ? LocationSummary::kCallOnSlowPath 4650 : LocationSummary::kNoCall); 4651 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 4652 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4653 } 4654 locations->SetInAt(0, Location::RequiresRegister()); 4655 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4656 if (DataType::IsFloatingPointType(instruction->GetType())) { 4657 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4658 } else { 4659 // The output overlaps for an object array get when read barriers 4660 // are enabled: we do not want the move to overwrite the array's 4661 // location, as we need it to emit the read barrier. 4662 locations->SetOut( 4663 Location::RequiresRegister(), 4664 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 4665 } 4666 } 4667 4668 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { 4669 LocationSummary* locations = instruction->GetLocations(); 4670 Location obj_loc = locations->InAt(0); 4671 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 4672 Location index = locations->InAt(1); 4673 Location out_loc = locations->Out(); 4674 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); 4675 4676 DataType::Type type = instruction->GetType(); 4677 switch (type) { 4678 case DataType::Type::kBool: 4679 case DataType::Type::kUint8: { 4680 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4681 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 4682 break; 4683 } 4684 4685 case DataType::Type::kInt8: { 4686 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4687 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 4688 break; 4689 } 4690 4691 case DataType::Type::kUint16: { 4692 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4693 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 4694 // Branch cases into compressed and uncompressed for each index's type. 4695 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 4696 NearLabel done, not_compressed; 4697 __ testb(Address(obj, count_offset), Immediate(1)); 4698 codegen_->MaybeRecordImplicitNullCheck(instruction); 4699 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 4700 "Expecting 0=compressed, 1=uncompressed"); 4701 __ j(kNotZero, ¬_compressed); 4702 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 4703 __ jmp(&done); 4704 __ Bind(¬_compressed); 4705 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 4706 __ Bind(&done); 4707 } else { 4708 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 4709 } 4710 break; 4711 } 4712 4713 case DataType::Type::kInt16: { 4714 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4715 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 4716 break; 4717 } 4718 4719 case DataType::Type::kInt32: { 4720 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4721 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 4722 break; 4723 } 4724 4725 case DataType::Type::kReference: { 4726 static_assert( 4727 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 4728 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 4729 // /* HeapReference<Object> */ out = 4730 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 4731 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 4732 // Note that a potential implicit null check is handled in this 4733 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. 4734 codegen_->GenerateArrayLoadWithBakerReadBarrier( 4735 instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); 4736 } else { 4737 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4738 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 4739 codegen_->MaybeRecordImplicitNullCheck(instruction); 4740 // If read barriers are enabled, emit read barriers other than 4741 // Baker's using a slow path (and also unpoison the loaded 4742 // reference, if heap poisoning is enabled). 4743 if (index.IsConstant()) { 4744 uint32_t offset = 4745 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; 4746 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); 4747 } else { 4748 codegen_->MaybeGenerateReadBarrierSlow( 4749 instruction, out_loc, out_loc, obj_loc, data_offset, index); 4750 } 4751 } 4752 break; 4753 } 4754 4755 case DataType::Type::kInt64: { 4756 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 4757 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 4758 break; 4759 } 4760 4761 case DataType::Type::kFloat32: { 4762 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 4763 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 4764 break; 4765 } 4766 4767 case DataType::Type::kFloat64: { 4768 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 4769 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 4770 break; 4771 } 4772 4773 case DataType::Type::kUint32: 4774 case DataType::Type::kUint64: 4775 case DataType::Type::kVoid: 4776 LOG(FATAL) << "Unreachable type " << type; 4777 UNREACHABLE(); 4778 } 4779 4780 if (type == DataType::Type::kReference) { 4781 // Potential implicit null checks, in the case of reference 4782 // arrays, are handled in the previous switch statement. 4783 } else { 4784 codegen_->MaybeRecordImplicitNullCheck(instruction); 4785 } 4786 } 4787 4788 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { 4789 DataType::Type value_type = instruction->GetComponentType(); 4790 4791 bool needs_write_barrier = 4792 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 4793 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 4794 4795 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4796 instruction, 4797 may_need_runtime_call_for_type_check ? 4798 LocationSummary::kCallOnSlowPath : 4799 LocationSummary::kNoCall); 4800 4801 locations->SetInAt(0, Location::RequiresRegister()); 4802 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4803 if (DataType::IsFloatingPointType(value_type)) { 4804 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); 4805 } else { 4806 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); 4807 } 4808 4809 if (needs_write_barrier) { 4810 // Temporary registers for the write barrier. 4811 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. 4812 locations->AddTemp(Location::RequiresRegister()); 4813 } 4814 } 4815 4816 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { 4817 LocationSummary* locations = instruction->GetLocations(); 4818 Location array_loc = locations->InAt(0); 4819 CpuRegister array = array_loc.AsRegister<CpuRegister>(); 4820 Location index = locations->InAt(1); 4821 Location value = locations->InAt(2); 4822 DataType::Type value_type = instruction->GetComponentType(); 4823 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 4824 bool needs_write_barrier = 4825 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 4826 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 4827 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 4828 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 4829 4830 switch (value_type) { 4831 case DataType::Type::kBool: 4832 case DataType::Type::kUint8: 4833 case DataType::Type::kInt8: { 4834 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); 4835 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset); 4836 if (value.IsRegister()) { 4837 __ movb(address, value.AsRegister<CpuRegister>()); 4838 } else { 4839 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); 4840 } 4841 codegen_->MaybeRecordImplicitNullCheck(instruction); 4842 break; 4843 } 4844 4845 case DataType::Type::kUint16: 4846 case DataType::Type::kInt16: { 4847 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); 4848 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset); 4849 if (value.IsRegister()) { 4850 __ movw(address, value.AsRegister<CpuRegister>()); 4851 } else { 4852 DCHECK(value.IsConstant()) << value; 4853 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 4854 } 4855 codegen_->MaybeRecordImplicitNullCheck(instruction); 4856 break; 4857 } 4858 4859 case DataType::Type::kReference: { 4860 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 4861 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 4862 4863 if (!value.IsRegister()) { 4864 // Just setting null. 4865 DCHECK(instruction->InputAt(2)->IsNullConstant()); 4866 DCHECK(value.IsConstant()) << value; 4867 __ movl(address, Immediate(0)); 4868 codegen_->MaybeRecordImplicitNullCheck(instruction); 4869 DCHECK(!needs_write_barrier); 4870 DCHECK(!may_need_runtime_call_for_type_check); 4871 break; 4872 } 4873 4874 DCHECK(needs_write_barrier); 4875 CpuRegister register_value = value.AsRegister<CpuRegister>(); 4876 // We cannot use a NearLabel for `done`, as its range may be too 4877 // short when Baker read barriers are enabled. 4878 Label done; 4879 NearLabel not_null, do_put; 4880 SlowPathCode* slow_path = nullptr; 4881 Location temp_loc = locations->GetTemp(0); 4882 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 4883 if (may_need_runtime_call_for_type_check) { 4884 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction); 4885 codegen_->AddSlowPath(slow_path); 4886 if (instruction->GetValueCanBeNull()) { 4887 __ testl(register_value, register_value); 4888 __ j(kNotEqual, ¬_null); 4889 __ movl(address, Immediate(0)); 4890 codegen_->MaybeRecordImplicitNullCheck(instruction); 4891 __ jmp(&done); 4892 __ Bind(¬_null); 4893 } 4894 4895 // Note that when Baker read barriers are enabled, the type 4896 // checks are performed without read barriers. This is fine, 4897 // even in the case where a class object is in the from-space 4898 // after the flip, as a comparison involving such a type would 4899 // not produce a false positive; it may of course produce a 4900 // false negative, in which case we would take the ArraySet 4901 // slow path. 4902 4903 // /* HeapReference<Class> */ temp = array->klass_ 4904 __ movl(temp, Address(array, class_offset)); 4905 codegen_->MaybeRecordImplicitNullCheck(instruction); 4906 __ MaybeUnpoisonHeapReference(temp); 4907 4908 // /* HeapReference<Class> */ temp = temp->component_type_ 4909 __ movl(temp, Address(temp, component_offset)); 4910 // If heap poisoning is enabled, no need to unpoison `temp` 4911 // nor the object reference in `register_value->klass`, as 4912 // we are comparing two poisoned references. 4913 __ cmpl(temp, Address(register_value, class_offset)); 4914 4915 if (instruction->StaticTypeOfArrayIsObjectArray()) { 4916 __ j(kEqual, &do_put); 4917 // If heap poisoning is enabled, the `temp` reference has 4918 // not been unpoisoned yet; unpoison it now. 4919 __ MaybeUnpoisonHeapReference(temp); 4920 4921 // If heap poisoning is enabled, no need to unpoison the 4922 // heap reference loaded below, as it is only used for a 4923 // comparison with null. 4924 __ cmpl(Address(temp, super_offset), Immediate(0)); 4925 __ j(kNotEqual, slow_path->GetEntryLabel()); 4926 __ Bind(&do_put); 4927 } else { 4928 __ j(kNotEqual, slow_path->GetEntryLabel()); 4929 } 4930 } 4931 4932 if (kPoisonHeapReferences) { 4933 __ movl(temp, register_value); 4934 __ PoisonHeapReference(temp); 4935 __ movl(address, temp); 4936 } else { 4937 __ movl(address, register_value); 4938 } 4939 if (!may_need_runtime_call_for_type_check) { 4940 codegen_->MaybeRecordImplicitNullCheck(instruction); 4941 } 4942 4943 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 4944 codegen_->MarkGCCard( 4945 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull()); 4946 __ Bind(&done); 4947 4948 if (slow_path != nullptr) { 4949 __ Bind(slow_path->GetExitLabel()); 4950 } 4951 4952 break; 4953 } 4954 4955 case DataType::Type::kInt32: { 4956 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 4957 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 4958 if (value.IsRegister()) { 4959 __ movl(address, value.AsRegister<CpuRegister>()); 4960 } else { 4961 DCHECK(value.IsConstant()) << value; 4962 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4963 __ movl(address, Immediate(v)); 4964 } 4965 codegen_->MaybeRecordImplicitNullCheck(instruction); 4966 break; 4967 } 4968 4969 case DataType::Type::kInt64: { 4970 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); 4971 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 4972 if (value.IsRegister()) { 4973 __ movq(address, value.AsRegister<CpuRegister>()); 4974 codegen_->MaybeRecordImplicitNullCheck(instruction); 4975 } else { 4976 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 4977 Address address_high = 4978 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 4979 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 4980 } 4981 break; 4982 } 4983 4984 case DataType::Type::kFloat32: { 4985 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); 4986 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 4987 if (value.IsFpuRegister()) { 4988 __ movss(address, value.AsFpuRegister<XmmRegister>()); 4989 } else { 4990 DCHECK(value.IsConstant()); 4991 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 4992 __ movl(address, Immediate(v)); 4993 } 4994 codegen_->MaybeRecordImplicitNullCheck(instruction); 4995 break; 4996 } 4997 4998 case DataType::Type::kFloat64: { 4999 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); 5000 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 5001 if (value.IsFpuRegister()) { 5002 __ movsd(address, value.AsFpuRegister<XmmRegister>()); 5003 codegen_->MaybeRecordImplicitNullCheck(instruction); 5004 } else { 5005 int64_t v = 5006 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 5007 Address address_high = 5008 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 5009 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 5010 } 5011 break; 5012 } 5013 5014 case DataType::Type::kUint32: 5015 case DataType::Type::kUint64: 5016 case DataType::Type::kVoid: 5017 LOG(FATAL) << "Unreachable type " << instruction->GetType(); 5018 UNREACHABLE(); 5019 } 5020 } 5021 5022 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { 5023 LocationSummary* locations = 5024 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5025 locations->SetInAt(0, Location::RequiresRegister()); 5026 if (!instruction->IsEmittedAtUseSite()) { 5027 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5028 } 5029 } 5030 5031 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { 5032 if (instruction->IsEmittedAtUseSite()) { 5033 return; 5034 } 5035 5036 LocationSummary* locations = instruction->GetLocations(); 5037 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 5038 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 5039 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 5040 __ movl(out, Address(obj, offset)); 5041 codegen_->MaybeRecordImplicitNullCheck(instruction); 5042 // Mask out most significant bit in case the array is String's array of char. 5043 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 5044 __ shrl(out, Immediate(1)); 5045 } 5046 } 5047 5048 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5049 RegisterSet caller_saves = RegisterSet::Empty(); 5050 InvokeRuntimeCallingConvention calling_convention; 5051 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 5052 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 5053 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 5054 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 5055 HInstruction* length = instruction->InputAt(1); 5056 if (!length->IsEmittedAtUseSite()) { 5057 locations->SetInAt(1, Location::RegisterOrConstant(length)); 5058 } 5059 } 5060 5061 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5062 LocationSummary* locations = instruction->GetLocations(); 5063 Location index_loc = locations->InAt(0); 5064 Location length_loc = locations->InAt(1); 5065 SlowPathCode* slow_path = 5066 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction); 5067 5068 if (length_loc.IsConstant()) { 5069 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); 5070 if (index_loc.IsConstant()) { 5071 // BCE will remove the bounds check if we are guarenteed to pass. 5072 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5073 if (index < 0 || index >= length) { 5074 codegen_->AddSlowPath(slow_path); 5075 __ jmp(slow_path->GetEntryLabel()); 5076 } else { 5077 // Some optimization after BCE may have generated this, and we should not 5078 // generate a bounds check if it is a valid range. 5079 } 5080 return; 5081 } 5082 5083 // We have to reverse the jump condition because the length is the constant. 5084 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>(); 5085 __ cmpl(index_reg, Immediate(length)); 5086 codegen_->AddSlowPath(slow_path); 5087 __ j(kAboveEqual, slow_path->GetEntryLabel()); 5088 } else { 5089 HInstruction* array_length = instruction->InputAt(1); 5090 if (array_length->IsEmittedAtUseSite()) { 5091 // Address the length field in the array. 5092 DCHECK(array_length->IsArrayLength()); 5093 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); 5094 Location array_loc = array_length->GetLocations()->InAt(0); 5095 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 5096 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 5097 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for 5098 // the string compression flag) with the in-memory length and avoid the temporary. 5099 CpuRegister length_reg = CpuRegister(TMP); 5100 __ movl(length_reg, array_len); 5101 codegen_->MaybeRecordImplicitNullCheck(array_length); 5102 __ shrl(length_reg, Immediate(1)); 5103 codegen_->GenerateIntCompare(length_reg, index_loc); 5104 } else { 5105 // Checking the bound for general case: 5106 // Array of char or String's array when the compression feature off. 5107 if (index_loc.IsConstant()) { 5108 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5109 __ cmpl(array_len, Immediate(value)); 5110 } else { 5111 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); 5112 } 5113 codegen_->MaybeRecordImplicitNullCheck(array_length); 5114 } 5115 } else { 5116 codegen_->GenerateIntCompare(length_loc, index_loc); 5117 } 5118 codegen_->AddSlowPath(slow_path); 5119 __ j(kBelowEqual, slow_path->GetEntryLabel()); 5120 } 5121 } 5122 5123 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, 5124 CpuRegister card, 5125 CpuRegister object, 5126 CpuRegister value, 5127 bool value_can_be_null) { 5128 NearLabel is_null; 5129 if (value_can_be_null) { 5130 __ testl(value, value); 5131 __ j(kEqual, &is_null); 5132 } 5133 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(), 5134 /* no_rip */ true)); 5135 __ movq(temp, object); 5136 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); 5137 __ movb(Address(temp, card, TIMES_1, 0), card); 5138 if (value_can_be_null) { 5139 __ Bind(&is_null); 5140 } 5141 } 5142 5143 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5144 LOG(FATAL) << "Unimplemented"; 5145 } 5146 5147 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) { 5148 if (instruction->GetNext()->IsSuspendCheck() && 5149 instruction->GetBlock()->GetLoopInformation() != nullptr) { 5150 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 5151 // The back edge will generate the suspend check. 5152 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 5153 } 5154 5155 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5156 } 5157 5158 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5159 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5160 instruction, LocationSummary::kCallOnSlowPath); 5161 // In suspend check slow path, usually there are no caller-save registers at all. 5162 // If SIMD instructions are present, however, we force spilling all live SIMD 5163 // registers in full width (since the runtime only saves/restores lower part). 5164 locations->SetCustomSlowPathCallerSaves( 5165 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5166 } 5167 5168 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5169 HBasicBlock* block = instruction->GetBlock(); 5170 if (block->GetLoopInformation() != nullptr) { 5171 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5172 // The back edge will generate the suspend check. 5173 return; 5174 } 5175 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5176 // The goto will generate the suspend check. 5177 return; 5178 } 5179 GenerateSuspendCheck(instruction, nullptr); 5180 } 5181 5182 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, 5183 HBasicBlock* successor) { 5184 SuspendCheckSlowPathX86_64* slow_path = 5185 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); 5186 if (slow_path == nullptr) { 5187 slow_path = 5188 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor); 5189 instruction->SetSlowPath(slow_path); 5190 codegen_->AddSlowPath(slow_path); 5191 if (successor != nullptr) { 5192 DCHECK(successor->IsLoopHeader()); 5193 } 5194 } else { 5195 DCHECK_EQ(slow_path->GetSuccessor(), successor); 5196 } 5197 5198 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(), 5199 /* no_rip */ true), 5200 Immediate(0)); 5201 if (successor == nullptr) { 5202 __ j(kNotEqual, slow_path->GetEntryLabel()); 5203 __ Bind(slow_path->GetReturnLabel()); 5204 } else { 5205 __ j(kEqual, codegen_->GetLabelOf(successor)); 5206 __ jmp(slow_path->GetEntryLabel()); 5207 } 5208 } 5209 5210 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { 5211 return codegen_->GetAssembler(); 5212 } 5213 5214 void ParallelMoveResolverX86_64::EmitMove(size_t index) { 5215 MoveOperands* move = moves_[index]; 5216 Location source = move->GetSource(); 5217 Location destination = move->GetDestination(); 5218 5219 if (source.IsRegister()) { 5220 if (destination.IsRegister()) { 5221 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); 5222 } else if (destination.IsStackSlot()) { 5223 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 5224 source.AsRegister<CpuRegister>()); 5225 } else { 5226 DCHECK(destination.IsDoubleStackSlot()); 5227 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 5228 source.AsRegister<CpuRegister>()); 5229 } 5230 } else if (source.IsStackSlot()) { 5231 if (destination.IsRegister()) { 5232 __ movl(destination.AsRegister<CpuRegister>(), 5233 Address(CpuRegister(RSP), source.GetStackIndex())); 5234 } else if (destination.IsFpuRegister()) { 5235 __ movss(destination.AsFpuRegister<XmmRegister>(), 5236 Address(CpuRegister(RSP), source.GetStackIndex())); 5237 } else { 5238 DCHECK(destination.IsStackSlot()); 5239 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5240 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5241 } 5242 } else if (source.IsDoubleStackSlot()) { 5243 if (destination.IsRegister()) { 5244 __ movq(destination.AsRegister<CpuRegister>(), 5245 Address(CpuRegister(RSP), source.GetStackIndex())); 5246 } else if (destination.IsFpuRegister()) { 5247 __ movsd(destination.AsFpuRegister<XmmRegister>(), 5248 Address(CpuRegister(RSP), source.GetStackIndex())); 5249 } else { 5250 DCHECK(destination.IsDoubleStackSlot()) << destination; 5251 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5252 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5253 } 5254 } else if (source.IsSIMDStackSlot()) { 5255 if (destination.IsFpuRegister()) { 5256 __ movups(destination.AsFpuRegister<XmmRegister>(), 5257 Address(CpuRegister(RSP), source.GetStackIndex())); 5258 } else { 5259 DCHECK(destination.IsSIMDStackSlot()); 5260 size_t high = kX86_64WordSize; 5261 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5262 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5263 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high)); 5264 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP)); 5265 } 5266 } else if (source.IsConstant()) { 5267 HConstant* constant = source.GetConstant(); 5268 if (constant->IsIntConstant() || constant->IsNullConstant()) { 5269 int32_t value = CodeGenerator::GetInt32ValueOf(constant); 5270 if (destination.IsRegister()) { 5271 if (value == 0) { 5272 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5273 } else { 5274 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value)); 5275 } 5276 } else { 5277 DCHECK(destination.IsStackSlot()) << destination; 5278 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 5279 } 5280 } else if (constant->IsLongConstant()) { 5281 int64_t value = constant->AsLongConstant()->GetValue(); 5282 if (destination.IsRegister()) { 5283 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); 5284 } else { 5285 DCHECK(destination.IsDoubleStackSlot()) << destination; 5286 codegen_->Store64BitValueToStack(destination, value); 5287 } 5288 } else if (constant->IsFloatConstant()) { 5289 float fp_value = constant->AsFloatConstant()->GetValue(); 5290 if (destination.IsFpuRegister()) { 5291 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5292 codegen_->Load32BitValue(dest, fp_value); 5293 } else { 5294 DCHECK(destination.IsStackSlot()) << destination; 5295 Immediate imm(bit_cast<int32_t, float>(fp_value)); 5296 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); 5297 } 5298 } else { 5299 DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); 5300 double fp_value = constant->AsDoubleConstant()->GetValue(); 5301 int64_t value = bit_cast<int64_t, double>(fp_value); 5302 if (destination.IsFpuRegister()) { 5303 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5304 codegen_->Load64BitValue(dest, fp_value); 5305 } else { 5306 DCHECK(destination.IsDoubleStackSlot()) << destination; 5307 codegen_->Store64BitValueToStack(destination, value); 5308 } 5309 } 5310 } else if (source.IsFpuRegister()) { 5311 if (destination.IsFpuRegister()) { 5312 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 5313 } else if (destination.IsStackSlot()) { 5314 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 5315 source.AsFpuRegister<XmmRegister>()); 5316 } else if (destination.IsDoubleStackSlot()) { 5317 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 5318 source.AsFpuRegister<XmmRegister>()); 5319 } else { 5320 DCHECK(destination.IsSIMDStackSlot()); 5321 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()), 5322 source.AsFpuRegister<XmmRegister>()); 5323 } 5324 } 5325 } 5326 5327 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { 5328 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5329 __ movl(Address(CpuRegister(RSP), mem), reg); 5330 __ movl(reg, CpuRegister(TMP)); 5331 } 5332 5333 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { 5334 __ movq(CpuRegister(TMP), reg1); 5335 __ movq(reg1, reg2); 5336 __ movq(reg2, CpuRegister(TMP)); 5337 } 5338 5339 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { 5340 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5341 __ movq(Address(CpuRegister(RSP), mem), reg); 5342 __ movq(reg, CpuRegister(TMP)); 5343 } 5344 5345 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { 5346 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5347 __ movss(Address(CpuRegister(RSP), mem), reg); 5348 __ movd(reg, CpuRegister(TMP)); 5349 } 5350 5351 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { 5352 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5353 __ movsd(Address(CpuRegister(RSP), mem), reg); 5354 __ movd(reg, CpuRegister(TMP)); 5355 } 5356 5357 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) { 5358 size_t extra_slot = 2 * kX86_64WordSize; 5359 __ subq(CpuRegister(RSP), Immediate(extra_slot)); 5360 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg)); 5361 ExchangeMemory64(0, mem + extra_slot, 2); 5362 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0)); 5363 __ addq(CpuRegister(RSP), Immediate(extra_slot)); 5364 } 5365 5366 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) { 5367 ScratchRegisterScope ensure_scratch( 5368 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5369 5370 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5371 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); 5372 __ movl(CpuRegister(ensure_scratch.GetRegister()), 5373 Address(CpuRegister(RSP), mem2 + stack_offset)); 5374 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); 5375 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), 5376 CpuRegister(ensure_scratch.GetRegister())); 5377 } 5378 5379 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) { 5380 ScratchRegisterScope ensure_scratch( 5381 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5382 5383 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5384 5385 // Now that temp registers are available (possibly spilled), exchange blocks of memory. 5386 for (int i = 0; i < num_of_qwords; i++) { 5387 __ movq(CpuRegister(TMP), 5388 Address(CpuRegister(RSP), mem1 + stack_offset)); 5389 __ movq(CpuRegister(ensure_scratch.GetRegister()), 5390 Address(CpuRegister(RSP), mem2 + stack_offset)); 5391 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), 5392 CpuRegister(TMP)); 5393 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), 5394 CpuRegister(ensure_scratch.GetRegister())); 5395 stack_offset += kX86_64WordSize; 5396 } 5397 } 5398 5399 void ParallelMoveResolverX86_64::EmitSwap(size_t index) { 5400 MoveOperands* move = moves_[index]; 5401 Location source = move->GetSource(); 5402 Location destination = move->GetDestination(); 5403 5404 if (source.IsRegister() && destination.IsRegister()) { 5405 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5406 } else if (source.IsRegister() && destination.IsStackSlot()) { 5407 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5408 } else if (source.IsStackSlot() && destination.IsRegister()) { 5409 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5410 } else if (source.IsStackSlot() && destination.IsStackSlot()) { 5411 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex()); 5412 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { 5413 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5414 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { 5415 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5416 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { 5417 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1); 5418 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { 5419 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); 5420 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); 5421 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); 5422 } else if (source.IsFpuRegister() && destination.IsStackSlot()) { 5423 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5424 } else if (source.IsStackSlot() && destination.IsFpuRegister()) { 5425 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5426 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { 5427 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5428 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { 5429 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5430 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { 5431 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2); 5432 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { 5433 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5434 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { 5435 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5436 } else { 5437 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; 5438 } 5439 } 5440 5441 5442 void ParallelMoveResolverX86_64::SpillScratch(int reg) { 5443 __ pushq(CpuRegister(reg)); 5444 } 5445 5446 5447 void ParallelMoveResolverX86_64::RestoreScratch(int reg) { 5448 __ popq(CpuRegister(reg)); 5449 } 5450 5451 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( 5452 SlowPathCode* slow_path, CpuRegister class_reg) { 5453 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 5454 const size_t status_byte_offset = 5455 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 5456 constexpr uint32_t shifted_initialized_value = 5457 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); 5458 5459 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); 5460 __ j(kBelow, slow_path->GetEntryLabel()); 5461 __ Bind(slow_path->GetExitLabel()); 5462 // No need for memory fence, thanks to the x86-64 memory model. 5463 } 5464 5465 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( 5466 HLoadClass::LoadKind desired_class_load_kind) { 5467 switch (desired_class_load_kind) { 5468 case HLoadClass::LoadKind::kInvalid: 5469 LOG(FATAL) << "UNREACHABLE"; 5470 UNREACHABLE(); 5471 case HLoadClass::LoadKind::kReferrersClass: 5472 break; 5473 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 5474 case HLoadClass::LoadKind::kBootImageClassTable: 5475 case HLoadClass::LoadKind::kBssEntry: 5476 DCHECK(!Runtime::Current()->UseJitCompilation()); 5477 break; 5478 case HLoadClass::LoadKind::kJitTableAddress: 5479 DCHECK(Runtime::Current()->UseJitCompilation()); 5480 break; 5481 case HLoadClass::LoadKind::kBootImageAddress: 5482 case HLoadClass::LoadKind::kRuntimeCall: 5483 break; 5484 } 5485 return desired_class_load_kind; 5486 } 5487 5488 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { 5489 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 5490 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 5491 // Custom calling convention: RAX serves as both input and output. 5492 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 5493 cls, 5494 Location::RegisterLocation(RAX), 5495 Location::RegisterLocation(RAX)); 5496 return; 5497 } 5498 DCHECK(!cls->NeedsAccessCheck()); 5499 5500 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 5501 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 5502 ? LocationSummary::kCallOnSlowPath 5503 : LocationSummary::kNoCall; 5504 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 5505 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 5506 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5507 } 5508 5509 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 5510 locations->SetInAt(0, Location::RequiresRegister()); 5511 } 5512 locations->SetOut(Location::RequiresRegister()); 5513 if (load_kind == HLoadClass::LoadKind::kBssEntry) { 5514 if (!kUseReadBarrier || kUseBakerReadBarrier) { 5515 // Rely on the type resolution and/or initialization to save everything. 5516 // Custom calling convention: RAX serves as both input and output. 5517 RegisterSet caller_saves = RegisterSet::Empty(); 5518 caller_saves.Add(Location::RegisterLocation(RAX)); 5519 locations->SetCustomSlowPathCallerSaves(caller_saves); 5520 } else { 5521 // For non-Baker read barrier we have a temp-clobbering call. 5522 } 5523 } 5524 } 5525 5526 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, 5527 dex::TypeIndex type_index, 5528 Handle<mirror::Class> handle) { 5529 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 5530 // Add a patch entry and return the label. 5531 jit_class_patches_.emplace_back(&dex_file, type_index.index_); 5532 PatchInfo<Label>* info = &jit_class_patches_.back(); 5533 return &info->label; 5534 } 5535 5536 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 5537 // move. 5538 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 5539 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 5540 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 5541 codegen_->GenerateLoadClassRuntimeCall(cls); 5542 return; 5543 } 5544 DCHECK(!cls->NeedsAccessCheck()); 5545 5546 LocationSummary* locations = cls->GetLocations(); 5547 Location out_loc = locations->Out(); 5548 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5549 5550 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 5551 ? kWithoutReadBarrier 5552 : kCompilerReadBarrierOption; 5553 bool generate_null_check = false; 5554 switch (load_kind) { 5555 case HLoadClass::LoadKind::kReferrersClass: { 5556 DCHECK(!cls->CanCallRuntime()); 5557 DCHECK(!cls->MustGenerateClinitCheck()); 5558 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 5559 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); 5560 GenerateGcRootFieldLoad( 5561 cls, 5562 out_loc, 5563 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), 5564 /* fixup_label */ nullptr, 5565 read_barrier_option); 5566 break; 5567 } 5568 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 5569 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 5570 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 5571 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); 5572 codegen_->RecordBootImageTypePatch(cls); 5573 break; 5574 case HLoadClass::LoadKind::kBootImageAddress: { 5575 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 5576 uint32_t address = dchecked_integral_cast<uint32_t>( 5577 reinterpret_cast<uintptr_t>(cls->GetClass().Get())); 5578 DCHECK_NE(address, 0u); 5579 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 5580 break; 5581 } 5582 case HLoadClass::LoadKind::kBootImageClassTable: { 5583 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 5584 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); 5585 codegen_->RecordBootImageTypePatch(cls); 5586 // Extract the reference from the slot data, i.e. clear the hash bits. 5587 int32_t masked_hash = ClassTable::TableSlot::MaskHash( 5588 ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); 5589 if (masked_hash != 0) { 5590 __ subl(out, Immediate(masked_hash)); 5591 } 5592 break; 5593 } 5594 case HLoadClass::LoadKind::kBssEntry: { 5595 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5596 /* no_rip */ false); 5597 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); 5598 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 5599 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 5600 generate_null_check = true; 5601 break; 5602 } 5603 case HLoadClass::LoadKind::kJitTableAddress: { 5604 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5605 /* no_rip */ true); 5606 Label* fixup_label = 5607 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); 5608 // /* GcRoot<mirror::Class> */ out = *address 5609 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 5610 break; 5611 } 5612 default: 5613 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind(); 5614 UNREACHABLE(); 5615 } 5616 5617 if (generate_null_check || cls->MustGenerateClinitCheck()) { 5618 DCHECK(cls->CanCallRuntime()); 5619 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( 5620 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); 5621 codegen_->AddSlowPath(slow_path); 5622 if (generate_null_check) { 5623 __ testl(out, out); 5624 __ j(kEqual, slow_path->GetEntryLabel()); 5625 } 5626 if (cls->MustGenerateClinitCheck()) { 5627 GenerateClassInitializationCheck(slow_path, out); 5628 } else { 5629 __ Bind(slow_path->GetExitLabel()); 5630 } 5631 } 5632 } 5633 5634 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { 5635 LocationSummary* locations = 5636 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 5637 locations->SetInAt(0, Location::RequiresRegister()); 5638 if (check->HasUses()) { 5639 locations->SetOut(Location::SameAsFirstInput()); 5640 } 5641 } 5642 5643 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { 5644 // We assume the class to not be null. 5645 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( 5646 check->GetLoadClass(), check, check->GetDexPc(), true); 5647 codegen_->AddSlowPath(slow_path); 5648 GenerateClassInitializationCheck(slow_path, 5649 check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); 5650 } 5651 5652 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( 5653 HLoadString::LoadKind desired_string_load_kind) { 5654 switch (desired_string_load_kind) { 5655 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 5656 case HLoadString::LoadKind::kBootImageInternTable: 5657 case HLoadString::LoadKind::kBssEntry: 5658 DCHECK(!Runtime::Current()->UseJitCompilation()); 5659 break; 5660 case HLoadString::LoadKind::kJitTableAddress: 5661 DCHECK(Runtime::Current()->UseJitCompilation()); 5662 break; 5663 case HLoadString::LoadKind::kBootImageAddress: 5664 case HLoadString::LoadKind::kRuntimeCall: 5665 break; 5666 } 5667 return desired_string_load_kind; 5668 } 5669 5670 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { 5671 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 5672 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 5673 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 5674 locations->SetOut(Location::RegisterLocation(RAX)); 5675 } else { 5676 locations->SetOut(Location::RequiresRegister()); 5677 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 5678 if (!kUseReadBarrier || kUseBakerReadBarrier) { 5679 // Rely on the pResolveString to save everything. 5680 // Custom calling convention: RAX serves as both input and output. 5681 RegisterSet caller_saves = RegisterSet::Empty(); 5682 caller_saves.Add(Location::RegisterLocation(RAX)); 5683 locations->SetCustomSlowPathCallerSaves(caller_saves); 5684 } else { 5685 // For non-Baker read barrier we have a temp-clobbering call. 5686 } 5687 } 5688 } 5689 } 5690 5691 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, 5692 dex::StringIndex string_index, 5693 Handle<mirror::String> handle) { 5694 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 5695 // Add a patch entry and return the label. 5696 jit_string_patches_.emplace_back(&dex_file, string_index.index_); 5697 PatchInfo<Label>* info = &jit_string_patches_.back(); 5698 return &info->label; 5699 } 5700 5701 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 5702 // move. 5703 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 5704 LocationSummary* locations = load->GetLocations(); 5705 Location out_loc = locations->Out(); 5706 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5707 5708 switch (load->GetLoadKind()) { 5709 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 5710 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 5711 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); 5712 codegen_->RecordBootImageStringPatch(load); 5713 return; 5714 } 5715 case HLoadString::LoadKind::kBootImageAddress: { 5716 uint32_t address = dchecked_integral_cast<uint32_t>( 5717 reinterpret_cast<uintptr_t>(load->GetString().Get())); 5718 DCHECK_NE(address, 0u); 5719 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 5720 return; 5721 } 5722 case HLoadString::LoadKind::kBootImageInternTable: { 5723 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 5724 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); 5725 codegen_->RecordBootImageStringPatch(load); 5726 return; 5727 } 5728 case HLoadString::LoadKind::kBssEntry: { 5729 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5730 /* no_rip */ false); 5731 Label* fixup_label = codegen_->NewStringBssEntryPatch(load); 5732 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 5733 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 5734 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); 5735 codegen_->AddSlowPath(slow_path); 5736 __ testl(out, out); 5737 __ j(kEqual, slow_path->GetEntryLabel()); 5738 __ Bind(slow_path->GetExitLabel()); 5739 return; 5740 } 5741 case HLoadString::LoadKind::kJitTableAddress: { 5742 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5743 /* no_rip */ true); 5744 Label* fixup_label = codegen_->NewJitRootStringPatch( 5745 load->GetDexFile(), load->GetStringIndex(), load->GetString()); 5746 // /* GcRoot<mirror::String> */ out = *address 5747 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 5748 return; 5749 } 5750 default: 5751 break; 5752 } 5753 5754 // TODO: Re-add the compiler code to do string dex cache lookup again. 5755 // Custom calling convention: RAX serves as both input and output. 5756 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_)); 5757 codegen_->InvokeRuntime(kQuickResolveString, 5758 load, 5759 load->GetDexPc()); 5760 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 5761 } 5762 5763 static Address GetExceptionTlsAddress() { 5764 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(), 5765 /* no_rip */ true); 5766 } 5767 5768 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { 5769 LocationSummary* locations = 5770 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 5771 locations->SetOut(Location::RequiresRegister()); 5772 } 5773 5774 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) { 5775 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress()); 5776 } 5777 5778 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { 5779 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 5780 } 5781 5782 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 5783 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0)); 5784 } 5785 5786 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { 5787 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5788 instruction, LocationSummary::kCallOnMainOnly); 5789 InvokeRuntimeCallingConvention calling_convention; 5790 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 5791 } 5792 5793 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { 5794 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 5795 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 5796 } 5797 5798 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { 5799 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 5800 // We need a temporary for holding the iftable length. 5801 return true; 5802 } 5803 return kEmitCompilerReadBarrier && 5804 !kUseBakerReadBarrier && 5805 (type_check_kind == TypeCheckKind::kAbstractClassCheck || 5806 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 5807 type_check_kind == TypeCheckKind::kArrayObjectCheck); 5808 } 5809 5810 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { 5811 return kEmitCompilerReadBarrier && 5812 !kUseBakerReadBarrier && 5813 (type_check_kind == TypeCheckKind::kAbstractClassCheck || 5814 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 5815 type_check_kind == TypeCheckKind::kArrayObjectCheck); 5816 } 5817 5818 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { 5819 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 5820 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 5821 bool baker_read_barrier_slow_path = false; 5822 switch (type_check_kind) { 5823 case TypeCheckKind::kExactCheck: 5824 case TypeCheckKind::kAbstractClassCheck: 5825 case TypeCheckKind::kClassHierarchyCheck: 5826 case TypeCheckKind::kArrayObjectCheck: { 5827 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 5828 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 5829 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 5830 break; 5831 } 5832 case TypeCheckKind::kArrayCheck: 5833 case TypeCheckKind::kUnresolvedCheck: 5834 case TypeCheckKind::kInterfaceCheck: 5835 call_kind = LocationSummary::kCallOnSlowPath; 5836 break; 5837 } 5838 5839 LocationSummary* locations = 5840 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 5841 if (baker_read_barrier_slow_path) { 5842 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5843 } 5844 locations->SetInAt(0, Location::RequiresRegister()); 5845 locations->SetInAt(1, Location::Any()); 5846 // Note that TypeCheckSlowPathX86_64 uses this "out" register too. 5847 locations->SetOut(Location::RequiresRegister()); 5848 // When read barriers are enabled, we need a temporary register for 5849 // some cases. 5850 if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { 5851 locations->AddTemp(Location::RequiresRegister()); 5852 } 5853 } 5854 5855 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { 5856 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 5857 LocationSummary* locations = instruction->GetLocations(); 5858 Location obj_loc = locations->InAt(0); 5859 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 5860 Location cls = locations->InAt(1); 5861 Location out_loc = locations->Out(); 5862 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5863 Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? 5864 locations->GetTemp(0) : 5865 Location::NoLocation(); 5866 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 5867 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 5868 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 5869 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 5870 SlowPathCode* slow_path = nullptr; 5871 NearLabel done, zero; 5872 5873 // Return 0 if `obj` is null. 5874 // Avoid null check if we know obj is not null. 5875 if (instruction->MustDoNullCheck()) { 5876 __ testl(obj, obj); 5877 __ j(kEqual, &zero); 5878 } 5879 5880 switch (type_check_kind) { 5881 case TypeCheckKind::kExactCheck: { 5882 ReadBarrierOption read_barrier_option = 5883 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 5884 // /* HeapReference<Class> */ out = obj->klass_ 5885 GenerateReferenceLoadTwoRegisters(instruction, 5886 out_loc, 5887 obj_loc, 5888 class_offset, 5889 read_barrier_option); 5890 if (cls.IsRegister()) { 5891 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5892 } else { 5893 DCHECK(cls.IsStackSlot()) << cls; 5894 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5895 } 5896 if (zero.IsLinked()) { 5897 // Classes must be equal for the instanceof to succeed. 5898 __ j(kNotEqual, &zero); 5899 __ movl(out, Immediate(1)); 5900 __ jmp(&done); 5901 } else { 5902 __ setcc(kEqual, out); 5903 // setcc only sets the low byte. 5904 __ andl(out, Immediate(1)); 5905 } 5906 break; 5907 } 5908 5909 case TypeCheckKind::kAbstractClassCheck: { 5910 ReadBarrierOption read_barrier_option = 5911 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 5912 // /* HeapReference<Class> */ out = obj->klass_ 5913 GenerateReferenceLoadTwoRegisters(instruction, 5914 out_loc, 5915 obj_loc, 5916 class_offset, 5917 read_barrier_option); 5918 // If the class is abstract, we eagerly fetch the super class of the 5919 // object to avoid doing a comparison we know will fail. 5920 NearLabel loop, success; 5921 __ Bind(&loop); 5922 // /* HeapReference<Class> */ out = out->super_class_ 5923 GenerateReferenceLoadOneRegister(instruction, 5924 out_loc, 5925 super_offset, 5926 maybe_temp_loc, 5927 read_barrier_option); 5928 __ testl(out, out); 5929 // If `out` is null, we use it for the result, and jump to `done`. 5930 __ j(kEqual, &done); 5931 if (cls.IsRegister()) { 5932 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5933 } else { 5934 DCHECK(cls.IsStackSlot()) << cls; 5935 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5936 } 5937 __ j(kNotEqual, &loop); 5938 __ movl(out, Immediate(1)); 5939 if (zero.IsLinked()) { 5940 __ jmp(&done); 5941 } 5942 break; 5943 } 5944 5945 case TypeCheckKind::kClassHierarchyCheck: { 5946 ReadBarrierOption read_barrier_option = 5947 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 5948 // /* HeapReference<Class> */ out = obj->klass_ 5949 GenerateReferenceLoadTwoRegisters(instruction, 5950 out_loc, 5951 obj_loc, 5952 class_offset, 5953 read_barrier_option); 5954 // Walk over the class hierarchy to find a match. 5955 NearLabel loop, success; 5956 __ Bind(&loop); 5957 if (cls.IsRegister()) { 5958 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5959 } else { 5960 DCHECK(cls.IsStackSlot()) << cls; 5961 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5962 } 5963 __ j(kEqual, &success); 5964 // /* HeapReference<Class> */ out = out->super_class_ 5965 GenerateReferenceLoadOneRegister(instruction, 5966 out_loc, 5967 super_offset, 5968 maybe_temp_loc, 5969 read_barrier_option); 5970 __ testl(out, out); 5971 __ j(kNotEqual, &loop); 5972 // If `out` is null, we use it for the result, and jump to `done`. 5973 __ jmp(&done); 5974 __ Bind(&success); 5975 __ movl(out, Immediate(1)); 5976 if (zero.IsLinked()) { 5977 __ jmp(&done); 5978 } 5979 break; 5980 } 5981 5982 case TypeCheckKind::kArrayObjectCheck: { 5983 ReadBarrierOption read_barrier_option = 5984 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 5985 // /* HeapReference<Class> */ out = obj->klass_ 5986 GenerateReferenceLoadTwoRegisters(instruction, 5987 out_loc, 5988 obj_loc, 5989 class_offset, 5990 read_barrier_option); 5991 // Do an exact check. 5992 NearLabel exact_check; 5993 if (cls.IsRegister()) { 5994 __ cmpl(out, cls.AsRegister<CpuRegister>()); 5995 } else { 5996 DCHECK(cls.IsStackSlot()) << cls; 5997 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 5998 } 5999 __ j(kEqual, &exact_check); 6000 // Otherwise, we need to check that the object's class is a non-primitive array. 6001 // /* HeapReference<Class> */ out = out->component_type_ 6002 GenerateReferenceLoadOneRegister(instruction, 6003 out_loc, 6004 component_offset, 6005 maybe_temp_loc, 6006 read_barrier_option); 6007 __ testl(out, out); 6008 // If `out` is null, we use it for the result, and jump to `done`. 6009 __ j(kEqual, &done); 6010 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot)); 6011 __ j(kNotEqual, &zero); 6012 __ Bind(&exact_check); 6013 __ movl(out, Immediate(1)); 6014 __ jmp(&done); 6015 break; 6016 } 6017 6018 case TypeCheckKind::kArrayCheck: { 6019 // No read barrier since the slow path will retry upon failure. 6020 // /* HeapReference<Class> */ out = obj->klass_ 6021 GenerateReferenceLoadTwoRegisters(instruction, 6022 out_loc, 6023 obj_loc, 6024 class_offset, 6025 kWithoutReadBarrier); 6026 if (cls.IsRegister()) { 6027 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6028 } else { 6029 DCHECK(cls.IsStackSlot()) << cls; 6030 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6031 } 6032 DCHECK(locations->OnlyCallsOnSlowPath()); 6033 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6034 instruction, /* is_fatal */ false); 6035 codegen_->AddSlowPath(slow_path); 6036 __ j(kNotEqual, slow_path->GetEntryLabel()); 6037 __ movl(out, Immediate(1)); 6038 if (zero.IsLinked()) { 6039 __ jmp(&done); 6040 } 6041 break; 6042 } 6043 6044 case TypeCheckKind::kUnresolvedCheck: 6045 case TypeCheckKind::kInterfaceCheck: { 6046 // Note that we indeed only call on slow path, but we always go 6047 // into the slow path for the unresolved and interface check 6048 // cases. 6049 // 6050 // We cannot directly call the InstanceofNonTrivial runtime 6051 // entry point without resorting to a type checking slow path 6052 // here (i.e. by calling InvokeRuntime directly), as it would 6053 // require to assign fixed registers for the inputs of this 6054 // HInstanceOf instruction (following the runtime calling 6055 // convention), which might be cluttered by the potential first 6056 // read barrier emission at the beginning of this method. 6057 // 6058 // TODO: Introduce a new runtime entry point taking the object 6059 // to test (instead of its class) as argument, and let it deal 6060 // with the read barrier issues. This will let us refactor this 6061 // case of the `switch` code as it was previously (with a direct 6062 // call to the runtime not using a type checking slow path). 6063 // This should also be beneficial for the other cases above. 6064 DCHECK(locations->OnlyCallsOnSlowPath()); 6065 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6066 instruction, /* is_fatal */ false); 6067 codegen_->AddSlowPath(slow_path); 6068 __ jmp(slow_path->GetEntryLabel()); 6069 if (zero.IsLinked()) { 6070 __ jmp(&done); 6071 } 6072 break; 6073 } 6074 } 6075 6076 if (zero.IsLinked()) { 6077 __ Bind(&zero); 6078 __ xorl(out, out); 6079 } 6080 6081 if (done.IsLinked()) { 6082 __ Bind(&done); 6083 } 6084 6085 if (slow_path != nullptr) { 6086 __ Bind(slow_path->GetExitLabel()); 6087 } 6088 } 6089 6090 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { 6091 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6092 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 6093 LocationSummary* locations = 6094 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 6095 locations->SetInAt(0, Location::RequiresRegister()); 6096 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 6097 // Require a register for the interface check since there is a loop that compares the class to 6098 // a memory address. 6099 locations->SetInAt(1, Location::RequiresRegister()); 6100 } else { 6101 locations->SetInAt(1, Location::Any()); 6102 } 6103 6104 // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. 6105 locations->AddTemp(Location::RequiresRegister()); 6106 // When read barriers are enabled, we need an additional temporary 6107 // register for some cases. 6108 if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { 6109 locations->AddTemp(Location::RequiresRegister()); 6110 } 6111 } 6112 6113 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { 6114 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6115 LocationSummary* locations = instruction->GetLocations(); 6116 Location obj_loc = locations->InAt(0); 6117 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 6118 Location cls = locations->InAt(1); 6119 Location temp_loc = locations->GetTemp(0); 6120 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 6121 Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? 6122 locations->GetTemp(1) : 6123 Location::NoLocation(); 6124 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6125 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6126 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6127 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 6128 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 6129 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 6130 const uint32_t object_array_data_offset = 6131 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 6132 6133 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 6134 SlowPathCode* type_check_slow_path = 6135 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6136 instruction, is_type_check_slow_path_fatal); 6137 codegen_->AddSlowPath(type_check_slow_path); 6138 6139 6140 NearLabel done; 6141 // Avoid null check if we know obj is not null. 6142 if (instruction->MustDoNullCheck()) { 6143 __ testl(obj, obj); 6144 __ j(kEqual, &done); 6145 } 6146 6147 switch (type_check_kind) { 6148 case TypeCheckKind::kExactCheck: 6149 case TypeCheckKind::kArrayCheck: { 6150 // /* HeapReference<Class> */ temp = obj->klass_ 6151 GenerateReferenceLoadTwoRegisters(instruction, 6152 temp_loc, 6153 obj_loc, 6154 class_offset, 6155 kWithoutReadBarrier); 6156 if (cls.IsRegister()) { 6157 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6158 } else { 6159 DCHECK(cls.IsStackSlot()) << cls; 6160 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6161 } 6162 // Jump to slow path for throwing the exception or doing a 6163 // more involved array check. 6164 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6165 break; 6166 } 6167 6168 case TypeCheckKind::kAbstractClassCheck: { 6169 // /* HeapReference<Class> */ temp = obj->klass_ 6170 GenerateReferenceLoadTwoRegisters(instruction, 6171 temp_loc, 6172 obj_loc, 6173 class_offset, 6174 kWithoutReadBarrier); 6175 // If the class is abstract, we eagerly fetch the super class of the 6176 // object to avoid doing a comparison we know will fail. 6177 NearLabel loop; 6178 __ Bind(&loop); 6179 // /* HeapReference<Class> */ temp = temp->super_class_ 6180 GenerateReferenceLoadOneRegister(instruction, 6181 temp_loc, 6182 super_offset, 6183 maybe_temp2_loc, 6184 kWithoutReadBarrier); 6185 6186 // If the class reference currently in `temp` is null, jump to the slow path to throw the 6187 // exception. 6188 __ testl(temp, temp); 6189 // Otherwise, compare the classes. 6190 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6191 if (cls.IsRegister()) { 6192 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6193 } else { 6194 DCHECK(cls.IsStackSlot()) << cls; 6195 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6196 } 6197 __ j(kNotEqual, &loop); 6198 break; 6199 } 6200 6201 case TypeCheckKind::kClassHierarchyCheck: { 6202 // /* HeapReference<Class> */ temp = obj->klass_ 6203 GenerateReferenceLoadTwoRegisters(instruction, 6204 temp_loc, 6205 obj_loc, 6206 class_offset, 6207 kWithoutReadBarrier); 6208 // Walk over the class hierarchy to find a match. 6209 NearLabel loop; 6210 __ Bind(&loop); 6211 if (cls.IsRegister()) { 6212 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6213 } else { 6214 DCHECK(cls.IsStackSlot()) << cls; 6215 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6216 } 6217 __ j(kEqual, &done); 6218 6219 // /* HeapReference<Class> */ temp = temp->super_class_ 6220 GenerateReferenceLoadOneRegister(instruction, 6221 temp_loc, 6222 super_offset, 6223 maybe_temp2_loc, 6224 kWithoutReadBarrier); 6225 6226 // If the class reference currently in `temp` is not null, jump 6227 // back at the beginning of the loop. 6228 __ testl(temp, temp); 6229 __ j(kNotZero, &loop); 6230 // Otherwise, jump to the slow path to throw the exception. 6231 __ jmp(type_check_slow_path->GetEntryLabel()); 6232 break; 6233 } 6234 6235 case TypeCheckKind::kArrayObjectCheck: { 6236 // /* HeapReference<Class> */ temp = obj->klass_ 6237 GenerateReferenceLoadTwoRegisters(instruction, 6238 temp_loc, 6239 obj_loc, 6240 class_offset, 6241 kWithoutReadBarrier); 6242 // Do an exact check. 6243 NearLabel check_non_primitive_component_type; 6244 if (cls.IsRegister()) { 6245 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6246 } else { 6247 DCHECK(cls.IsStackSlot()) << cls; 6248 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6249 } 6250 __ j(kEqual, &done); 6251 6252 // Otherwise, we need to check that the object's class is a non-primitive array. 6253 // /* HeapReference<Class> */ temp = temp->component_type_ 6254 GenerateReferenceLoadOneRegister(instruction, 6255 temp_loc, 6256 component_offset, 6257 maybe_temp2_loc, 6258 kWithoutReadBarrier); 6259 6260 // If the component type is not null (i.e. the object is indeed 6261 // an array), jump to label `check_non_primitive_component_type` 6262 // to further check that this component type is not a primitive 6263 // type. 6264 __ testl(temp, temp); 6265 // Otherwise, jump to the slow path to throw the exception. 6266 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6267 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); 6268 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6269 break; 6270 } 6271 6272 case TypeCheckKind::kUnresolvedCheck: { 6273 // We always go into the type check slow path for the unresolved case. 6274 // 6275 // We cannot directly call the CheckCast runtime entry point 6276 // without resorting to a type checking slow path here (i.e. by 6277 // calling InvokeRuntime directly), as it would require to 6278 // assign fixed registers for the inputs of this HInstanceOf 6279 // instruction (following the runtime calling convention), which 6280 // might be cluttered by the potential first read barrier 6281 // emission at the beginning of this method. 6282 __ jmp(type_check_slow_path->GetEntryLabel()); 6283 break; 6284 } 6285 6286 case TypeCheckKind::kInterfaceCheck: 6287 // Fast path for the interface check. Try to avoid read barriers to improve the fast path. 6288 // We can not get false positives by doing this. 6289 // /* HeapReference<Class> */ temp = obj->klass_ 6290 GenerateReferenceLoadTwoRegisters(instruction, 6291 temp_loc, 6292 obj_loc, 6293 class_offset, 6294 kWithoutReadBarrier); 6295 6296 // /* HeapReference<Class> */ temp = temp->iftable_ 6297 GenerateReferenceLoadTwoRegisters(instruction, 6298 temp_loc, 6299 temp_loc, 6300 iftable_offset, 6301 kWithoutReadBarrier); 6302 // Iftable is never null. 6303 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); 6304 // Maybe poison the `cls` for direct comparison with memory. 6305 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>()); 6306 // Loop through the iftable and check if any class matches. 6307 NearLabel start_loop; 6308 __ Bind(&start_loop); 6309 // Need to subtract first to handle the empty array case. 6310 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); 6311 __ j(kNegative, type_check_slow_path->GetEntryLabel()); 6312 // Go to next interface if the classes do not match. 6313 __ cmpl(cls.AsRegister<CpuRegister>(), 6314 CodeGeneratorX86_64::ArrayAddress(temp, 6315 maybe_temp2_loc, 6316 TIMES_4, 6317 object_array_data_offset)); 6318 __ j(kNotEqual, &start_loop); // Return if same class. 6319 // If `cls` was poisoned above, unpoison it. 6320 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); 6321 break; 6322 } 6323 6324 if (done.IsLinked()) { 6325 __ Bind(&done); 6326 } 6327 6328 __ Bind(type_check_slow_path->GetExitLabel()); 6329 } 6330 6331 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6332 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6333 instruction, LocationSummary::kCallOnMainOnly); 6334 InvokeRuntimeCallingConvention calling_convention; 6335 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6336 } 6337 6338 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6339 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 6340 instruction, 6341 instruction->GetDexPc()); 6342 if (instruction->IsEnter()) { 6343 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 6344 } else { 6345 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 6346 } 6347 } 6348 6349 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } 6350 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } 6351 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } 6352 6353 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6354 LocationSummary* locations = 6355 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6356 DCHECK(instruction->GetResultType() == DataType::Type::kInt32 6357 || instruction->GetResultType() == DataType::Type::kInt64); 6358 locations->SetInAt(0, Location::RequiresRegister()); 6359 locations->SetInAt(1, Location::Any()); 6360 locations->SetOut(Location::SameAsFirstInput()); 6361 } 6362 6363 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) { 6364 HandleBitwiseOperation(instruction); 6365 } 6366 6367 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) { 6368 HandleBitwiseOperation(instruction); 6369 } 6370 6371 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) { 6372 HandleBitwiseOperation(instruction); 6373 } 6374 6375 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6376 LocationSummary* locations = instruction->GetLocations(); 6377 Location first = locations->InAt(0); 6378 Location second = locations->InAt(1); 6379 DCHECK(first.Equals(locations->Out())); 6380 6381 if (instruction->GetResultType() == DataType::Type::kInt32) { 6382 if (second.IsRegister()) { 6383 if (instruction->IsAnd()) { 6384 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6385 } else if (instruction->IsOr()) { 6386 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6387 } else { 6388 DCHECK(instruction->IsXor()); 6389 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6390 } 6391 } else if (second.IsConstant()) { 6392 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 6393 if (instruction->IsAnd()) { 6394 __ andl(first.AsRegister<CpuRegister>(), imm); 6395 } else if (instruction->IsOr()) { 6396 __ orl(first.AsRegister<CpuRegister>(), imm); 6397 } else { 6398 DCHECK(instruction->IsXor()); 6399 __ xorl(first.AsRegister<CpuRegister>(), imm); 6400 } 6401 } else { 6402 Address address(CpuRegister(RSP), second.GetStackIndex()); 6403 if (instruction->IsAnd()) { 6404 __ andl(first.AsRegister<CpuRegister>(), address); 6405 } else if (instruction->IsOr()) { 6406 __ orl(first.AsRegister<CpuRegister>(), address); 6407 } else { 6408 DCHECK(instruction->IsXor()); 6409 __ xorl(first.AsRegister<CpuRegister>(), address); 6410 } 6411 } 6412 } else { 6413 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 6414 CpuRegister first_reg = first.AsRegister<CpuRegister>(); 6415 bool second_is_constant = false; 6416 int64_t value = 0; 6417 if (second.IsConstant()) { 6418 second_is_constant = true; 6419 value = second.GetConstant()->AsLongConstant()->GetValue(); 6420 } 6421 bool is_int32_value = IsInt<32>(value); 6422 6423 if (instruction->IsAnd()) { 6424 if (second_is_constant) { 6425 if (is_int32_value) { 6426 __ andq(first_reg, Immediate(static_cast<int32_t>(value))); 6427 } else { 6428 __ andq(first_reg, codegen_->LiteralInt64Address(value)); 6429 } 6430 } else if (second.IsDoubleStackSlot()) { 6431 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6432 } else { 6433 __ andq(first_reg, second.AsRegister<CpuRegister>()); 6434 } 6435 } else if (instruction->IsOr()) { 6436 if (second_is_constant) { 6437 if (is_int32_value) { 6438 __ orq(first_reg, Immediate(static_cast<int32_t>(value))); 6439 } else { 6440 __ orq(first_reg, codegen_->LiteralInt64Address(value)); 6441 } 6442 } else if (second.IsDoubleStackSlot()) { 6443 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6444 } else { 6445 __ orq(first_reg, second.AsRegister<CpuRegister>()); 6446 } 6447 } else { 6448 DCHECK(instruction->IsXor()); 6449 if (second_is_constant) { 6450 if (is_int32_value) { 6451 __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); 6452 } else { 6453 __ xorq(first_reg, codegen_->LiteralInt64Address(value)); 6454 } 6455 } else if (second.IsDoubleStackSlot()) { 6456 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6457 } else { 6458 __ xorq(first_reg, second.AsRegister<CpuRegister>()); 6459 } 6460 } 6461 } 6462 } 6463 6464 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( 6465 HInstruction* instruction, 6466 Location out, 6467 uint32_t offset, 6468 Location maybe_temp, 6469 ReadBarrierOption read_barrier_option) { 6470 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 6471 if (read_barrier_option == kWithReadBarrier) { 6472 CHECK(kEmitCompilerReadBarrier); 6473 if (kUseBakerReadBarrier) { 6474 // Load with fast path based Baker's read barrier. 6475 // /* HeapReference<Object> */ out = *(out + offset) 6476 codegen_->GenerateFieldLoadWithBakerReadBarrier( 6477 instruction, out, out_reg, offset, /* needs_null_check */ false); 6478 } else { 6479 // Load with slow path based read barrier. 6480 // Save the value of `out` into `maybe_temp` before overwriting it 6481 // in the following move operation, as we will need it for the 6482 // read barrier below. 6483 DCHECK(maybe_temp.IsRegister()) << maybe_temp; 6484 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); 6485 // /* HeapReference<Object> */ out = *(out + offset) 6486 __ movl(out_reg, Address(out_reg, offset)); 6487 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 6488 } 6489 } else { 6490 // Plain load with no read barrier. 6491 // /* HeapReference<Object> */ out = *(out + offset) 6492 __ movl(out_reg, Address(out_reg, offset)); 6493 __ MaybeUnpoisonHeapReference(out_reg); 6494 } 6495 } 6496 6497 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( 6498 HInstruction* instruction, 6499 Location out, 6500 Location obj, 6501 uint32_t offset, 6502 ReadBarrierOption read_barrier_option) { 6503 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 6504 CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); 6505 if (read_barrier_option == kWithReadBarrier) { 6506 CHECK(kEmitCompilerReadBarrier); 6507 if (kUseBakerReadBarrier) { 6508 // Load with fast path based Baker's read barrier. 6509 // /* HeapReference<Object> */ out = *(obj + offset) 6510 codegen_->GenerateFieldLoadWithBakerReadBarrier( 6511 instruction, out, obj_reg, offset, /* needs_null_check */ false); 6512 } else { 6513 // Load with slow path based read barrier. 6514 // /* HeapReference<Object> */ out = *(obj + offset) 6515 __ movl(out_reg, Address(obj_reg, offset)); 6516 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 6517 } 6518 } else { 6519 // Plain load with no read barrier. 6520 // /* HeapReference<Object> */ out = *(obj + offset) 6521 __ movl(out_reg, Address(obj_reg, offset)); 6522 __ MaybeUnpoisonHeapReference(out_reg); 6523 } 6524 } 6525 6526 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( 6527 HInstruction* instruction, 6528 Location root, 6529 const Address& address, 6530 Label* fixup_label, 6531 ReadBarrierOption read_barrier_option) { 6532 CpuRegister root_reg = root.AsRegister<CpuRegister>(); 6533 if (read_barrier_option == kWithReadBarrier) { 6534 DCHECK(kEmitCompilerReadBarrier); 6535 if (kUseBakerReadBarrier) { 6536 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 6537 // Baker's read barrier are used: 6538 // 6539 // root = obj.field; 6540 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() 6541 // if (temp != null) { 6542 // root = temp(root) 6543 // } 6544 6545 // /* GcRoot<mirror::Object> */ root = *address 6546 __ movl(root_reg, address); 6547 if (fixup_label != nullptr) { 6548 __ Bind(fixup_label); 6549 } 6550 static_assert( 6551 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 6552 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 6553 "have different sizes."); 6554 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 6555 "art::mirror::CompressedReference<mirror::Object> and int32_t " 6556 "have different sizes."); 6557 6558 // Slow path marking the GC root `root`. 6559 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( 6560 instruction, root, /* unpoison_ref_before_marking */ false); 6561 codegen_->AddSlowPath(slow_path); 6562 6563 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint. 6564 const int32_t entry_point_offset = 6565 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); 6566 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0)); 6567 // The entrypoint is null when the GC is not marking. 6568 __ j(kNotEqual, slow_path->GetEntryLabel()); 6569 __ Bind(slow_path->GetExitLabel()); 6570 } else { 6571 // GC root loaded through a slow path for read barriers other 6572 // than Baker's. 6573 // /* GcRoot<mirror::Object>* */ root = address 6574 __ leaq(root_reg, address); 6575 if (fixup_label != nullptr) { 6576 __ Bind(fixup_label); 6577 } 6578 // /* mirror::Object* */ root = root->Read() 6579 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 6580 } 6581 } else { 6582 // Plain GC root load with no read barrier. 6583 // /* GcRoot<mirror::Object> */ root = *address 6584 __ movl(root_reg, address); 6585 if (fixup_label != nullptr) { 6586 __ Bind(fixup_label); 6587 } 6588 // Note that GC roots are not affected by heap poisoning, thus we 6589 // do not have to unpoison `root_reg` here. 6590 } 6591 } 6592 6593 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 6594 Location ref, 6595 CpuRegister obj, 6596 uint32_t offset, 6597 bool needs_null_check) { 6598 DCHECK(kEmitCompilerReadBarrier); 6599 DCHECK(kUseBakerReadBarrier); 6600 6601 // /* HeapReference<Object> */ ref = *(obj + offset) 6602 Address src(obj, offset); 6603 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 6604 } 6605 6606 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 6607 Location ref, 6608 CpuRegister obj, 6609 uint32_t data_offset, 6610 Location index, 6611 bool needs_null_check) { 6612 DCHECK(kEmitCompilerReadBarrier); 6613 DCHECK(kUseBakerReadBarrier); 6614 6615 static_assert( 6616 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 6617 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 6618 // /* HeapReference<Object> */ ref = 6619 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 6620 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset); 6621 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 6622 } 6623 6624 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 6625 Location ref, 6626 CpuRegister obj, 6627 const Address& src, 6628 bool needs_null_check, 6629 bool always_update_field, 6630 CpuRegister* temp1, 6631 CpuRegister* temp2) { 6632 DCHECK(kEmitCompilerReadBarrier); 6633 DCHECK(kUseBakerReadBarrier); 6634 6635 // In slow path based read barriers, the read barrier call is 6636 // inserted after the original load. However, in fast path based 6637 // Baker's read barriers, we need to perform the load of 6638 // mirror::Object::monitor_ *before* the original reference load. 6639 // This load-load ordering is required by the read barrier. 6640 // The fast path/slow path (for Baker's algorithm) should look like: 6641 // 6642 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 6643 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 6644 // HeapReference<Object> ref = *src; // Original reference load. 6645 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 6646 // if (is_gray) { 6647 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. 6648 // } 6649 // 6650 // Note: the original implementation in ReadBarrier::Barrier is 6651 // slightly more complex as: 6652 // - it implements the load-load fence using a data dependency on 6653 // the high-bits of rb_state, which are expected to be all zeroes 6654 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead 6655 // here, which is a no-op thanks to the x86-64 memory model); 6656 // - it performs additional checks that we do not do here for 6657 // performance reasons. 6658 6659 CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); 6660 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 6661 6662 // Given the numeric representation, it's enough to check the low bit of the rb_state. 6663 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); 6664 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 6665 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 6666 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 6667 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 6668 6669 // if (rb_state == ReadBarrier::GrayState()) 6670 // ref = ReadBarrier::Mark(ref); 6671 // At this point, just do the "if" and make sure that flags are preserved until the branch. 6672 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); 6673 if (needs_null_check) { 6674 MaybeRecordImplicitNullCheck(instruction); 6675 } 6676 6677 // Load fence to prevent load-load reordering. 6678 // Note that this is a no-op, thanks to the x86-64 memory model. 6679 GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 6680 6681 // The actual reference load. 6682 // /* HeapReference<Object> */ ref = *src 6683 __ movl(ref_reg, src); // Flags are unaffected. 6684 6685 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. 6686 // Slow path marking the object `ref` when it is gray. 6687 SlowPathCode* slow_path; 6688 if (always_update_field) { 6689 DCHECK(temp1 != nullptr); 6690 DCHECK(temp2 != nullptr); 6691 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( 6692 instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); 6693 } else { 6694 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( 6695 instruction, ref, /* unpoison_ref_before_marking */ true); 6696 } 6697 AddSlowPath(slow_path); 6698 6699 // We have done the "if" of the gray bit check above, now branch based on the flags. 6700 __ j(kNotZero, slow_path->GetEntryLabel()); 6701 6702 // Object* ref = ref_addr->AsMirrorPtr() 6703 __ MaybeUnpoisonHeapReference(ref_reg); 6704 6705 __ Bind(slow_path->GetExitLabel()); 6706 } 6707 6708 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, 6709 Location out, 6710 Location ref, 6711 Location obj, 6712 uint32_t offset, 6713 Location index) { 6714 DCHECK(kEmitCompilerReadBarrier); 6715 6716 // Insert a slow path based read barrier *after* the reference load. 6717 // 6718 // If heap poisoning is enabled, the unpoisoning of the loaded 6719 // reference will be carried out by the runtime within the slow 6720 // path. 6721 // 6722 // Note that `ref` currently does not get unpoisoned (when heap 6723 // poisoning is enabled), which is alright as the `ref` argument is 6724 // not used by the artReadBarrierSlow entry point. 6725 // 6726 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 6727 SlowPathCode* slow_path = new (GetScopedAllocator()) 6728 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); 6729 AddSlowPath(slow_path); 6730 6731 __ jmp(slow_path->GetEntryLabel()); 6732 __ Bind(slow_path->GetExitLabel()); 6733 } 6734 6735 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 6736 Location out, 6737 Location ref, 6738 Location obj, 6739 uint32_t offset, 6740 Location index) { 6741 if (kEmitCompilerReadBarrier) { 6742 // Baker's read barriers shall be handled by the fast path 6743 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). 6744 DCHECK(!kUseBakerReadBarrier); 6745 // If heap poisoning is enabled, unpoisoning will be taken care of 6746 // by the runtime within the slow path. 6747 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 6748 } else if (kPoisonHeapReferences) { 6749 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); 6750 } 6751 } 6752 6753 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 6754 Location out, 6755 Location root) { 6756 DCHECK(kEmitCompilerReadBarrier); 6757 6758 // Insert a slow path based read barrier *after* the GC root load. 6759 // 6760 // Note that GC roots are not affected by heap poisoning, so we do 6761 // not need to do anything special for this here. 6762 SlowPathCode* slow_path = 6763 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); 6764 AddSlowPath(slow_path); 6765 6766 __ jmp(slow_path->GetEntryLabel()); 6767 __ Bind(slow_path->GetExitLabel()); 6768 } 6769 6770 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 6771 // Nothing to do, this should be removed during prepare for register allocator. 6772 LOG(FATAL) << "Unreachable"; 6773 } 6774 6775 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 6776 // Nothing to do, this should be removed during prepare for register allocator. 6777 LOG(FATAL) << "Unreachable"; 6778 } 6779 6780 // Simple implementation of packed switch - generate cascaded compare/jumps. 6781 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 6782 LocationSummary* locations = 6783 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 6784 locations->SetInAt(0, Location::RequiresRegister()); 6785 locations->AddTemp(Location::RequiresRegister()); 6786 locations->AddTemp(Location::RequiresRegister()); 6787 } 6788 6789 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 6790 int32_t lower_bound = switch_instr->GetStartValue(); 6791 uint32_t num_entries = switch_instr->GetNumEntries(); 6792 LocationSummary* locations = switch_instr->GetLocations(); 6793 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); 6794 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); 6795 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 6796 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 6797 6798 // Should we generate smaller inline compare/jumps? 6799 if (num_entries <= kPackedSwitchJumpTableThreshold) { 6800 // Figure out the correct compare values and jump conditions. 6801 // Handle the first compare/branch as a special case because it might 6802 // jump to the default case. 6803 DCHECK_GT(num_entries, 2u); 6804 Condition first_condition; 6805 uint32_t index; 6806 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 6807 if (lower_bound != 0) { 6808 first_condition = kLess; 6809 __ cmpl(value_reg_in, Immediate(lower_bound)); 6810 __ j(first_condition, codegen_->GetLabelOf(default_block)); 6811 __ j(kEqual, codegen_->GetLabelOf(successors[0])); 6812 6813 index = 1; 6814 } else { 6815 // Handle all the compare/jumps below. 6816 first_condition = kBelow; 6817 index = 0; 6818 } 6819 6820 // Handle the rest of the compare/jumps. 6821 for (; index + 1 < num_entries; index += 2) { 6822 int32_t compare_to_value = lower_bound + index + 1; 6823 __ cmpl(value_reg_in, Immediate(compare_to_value)); 6824 // Jump to successors[index] if value < case_value[index]. 6825 __ j(first_condition, codegen_->GetLabelOf(successors[index])); 6826 // Jump to successors[index + 1] if value == case_value[index + 1]. 6827 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); 6828 } 6829 6830 if (index != num_entries) { 6831 // There are an odd number of entries. Handle the last one. 6832 DCHECK_EQ(index + 1, num_entries); 6833 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index))); 6834 __ j(kEqual, codegen_->GetLabelOf(successors[index])); 6835 } 6836 6837 // And the default for any other value. 6838 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 6839 __ jmp(codegen_->GetLabelOf(default_block)); 6840 } 6841 return; 6842 } 6843 6844 // Remove the bias, if needed. 6845 Register value_reg_out = value_reg_in.AsRegister(); 6846 if (lower_bound != 0) { 6847 __ leal(temp_reg, Address(value_reg_in, -lower_bound)); 6848 value_reg_out = temp_reg.AsRegister(); 6849 } 6850 CpuRegister value_reg(value_reg_out); 6851 6852 // Is the value in range? 6853 __ cmpl(value_reg, Immediate(num_entries - 1)); 6854 __ j(kAbove, codegen_->GetLabelOf(default_block)); 6855 6856 // We are in the range of the table. 6857 // Load the address of the jump table in the constant area. 6858 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr)); 6859 6860 // Load the (signed) offset from the jump table. 6861 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0)); 6862 6863 // Add the offset to the address of the table base. 6864 __ addq(temp_reg, base_reg); 6865 6866 // And jump. 6867 __ jmp(temp_reg); 6868 } 6869 6870 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction 6871 ATTRIBUTE_UNUSED) { 6872 LOG(FATAL) << "Unreachable"; 6873 } 6874 6875 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction 6876 ATTRIBUTE_UNUSED) { 6877 LOG(FATAL) << "Unreachable"; 6878 } 6879 6880 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) { 6881 if (value == 0) { 6882 __ xorl(dest, dest); 6883 } else { 6884 __ movl(dest, Immediate(value)); 6885 } 6886 } 6887 6888 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { 6889 if (value == 0) { 6890 // Clears upper bits too. 6891 __ xorl(dest, dest); 6892 } else if (IsUint<32>(value)) { 6893 // We can use a 32 bit move, as it will zero-extend and is shorter. 6894 __ movl(dest, Immediate(static_cast<int32_t>(value))); 6895 } else { 6896 __ movq(dest, Immediate(value)); 6897 } 6898 } 6899 6900 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) { 6901 if (value == 0) { 6902 __ xorps(dest, dest); 6903 } else { 6904 __ movss(dest, LiteralInt32Address(value)); 6905 } 6906 } 6907 6908 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) { 6909 if (value == 0) { 6910 __ xorpd(dest, dest); 6911 } else { 6912 __ movsd(dest, LiteralInt64Address(value)); 6913 } 6914 } 6915 6916 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) { 6917 Load32BitValue(dest, bit_cast<int32_t, float>(value)); 6918 } 6919 6920 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) { 6921 Load64BitValue(dest, bit_cast<int64_t, double>(value)); 6922 } 6923 6924 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) { 6925 if (value == 0) { 6926 __ testl(dest, dest); 6927 } else { 6928 __ cmpl(dest, Immediate(value)); 6929 } 6930 } 6931 6932 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) { 6933 if (IsInt<32>(value)) { 6934 if (value == 0) { 6935 __ testq(dest, dest); 6936 } else { 6937 __ cmpq(dest, Immediate(static_cast<int32_t>(value))); 6938 } 6939 } else { 6940 // Value won't fit in an int. 6941 __ cmpq(dest, LiteralInt64Address(value)); 6942 } 6943 } 6944 6945 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) { 6946 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 6947 GenerateIntCompare(lhs_reg, rhs); 6948 } 6949 6950 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) { 6951 if (rhs.IsConstant()) { 6952 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); 6953 Compare32BitValue(lhs, value); 6954 } else if (rhs.IsStackSlot()) { 6955 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex())); 6956 } else { 6957 __ cmpl(lhs, rhs.AsRegister<CpuRegister>()); 6958 } 6959 } 6960 6961 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) { 6962 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 6963 if (rhs.IsConstant()) { 6964 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue(); 6965 Compare64BitValue(lhs_reg, value); 6966 } else if (rhs.IsDoubleStackSlot()) { 6967 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 6968 } else { 6969 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>()); 6970 } 6971 } 6972 6973 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj, 6974 Location index, 6975 ScaleFactor scale, 6976 uint32_t data_offset) { 6977 return index.IsConstant() ? 6978 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : 6979 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); 6980 } 6981 6982 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { 6983 DCHECK(dest.IsDoubleStackSlot()); 6984 if (IsInt<32>(value)) { 6985 // Can move directly as an int32 constant. 6986 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), 6987 Immediate(static_cast<int32_t>(value))); 6988 } else { 6989 Load64BitValue(CpuRegister(TMP), value); 6990 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP)); 6991 } 6992 } 6993 6994 /** 6995 * Class to handle late fixup of offsets into constant area. 6996 */ 6997 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { 6998 public: 6999 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset) 7000 : codegen_(&codegen), offset_into_constant_area_(offset) {} 7001 7002 protected: 7003 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } 7004 7005 CodeGeneratorX86_64* codegen_; 7006 7007 private: 7008 void Process(const MemoryRegion& region, int pos) OVERRIDE { 7009 // Patch the correct offset for the instruction. We use the address of the 7010 // 'next' instruction, which is 'pos' (patch the 4 bytes before). 7011 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; 7012 int32_t relative_position = constant_offset - pos; 7013 7014 // Patch in the right value. 7015 region.StoreUnaligned<int32_t>(pos - 4, relative_position); 7016 } 7017 7018 // Location in constant area that the fixup refers to. 7019 size_t offset_into_constant_area_; 7020 }; 7021 7022 /** 7023 t * Class to handle late fixup of offsets to a jump table that will be created in the 7024 * constant area. 7025 */ 7026 class JumpTableRIPFixup : public RIPFixup { 7027 public: 7028 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr) 7029 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {} 7030 7031 void CreateJumpTable() { 7032 X86_64Assembler* assembler = codegen_->GetAssembler(); 7033 7034 // Ensure that the reference to the jump table has the correct offset. 7035 const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); 7036 SetOffset(offset_in_constant_table); 7037 7038 // Compute the offset from the start of the function to this jump table. 7039 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table; 7040 7041 // Populate the jump table with the correct values for the jump table. 7042 int32_t num_entries = switch_instr_->GetNumEntries(); 7043 HBasicBlock* block = switch_instr_->GetBlock(); 7044 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); 7045 // The value that we want is the target offset - the position of the table. 7046 for (int32_t i = 0; i < num_entries; i++) { 7047 HBasicBlock* b = successors[i]; 7048 Label* l = codegen_->GetLabelOf(b); 7049 DCHECK(l->IsBound()); 7050 int32_t offset_to_block = l->Position() - current_table_offset; 7051 assembler->AppendInt32(offset_to_block); 7052 } 7053 } 7054 7055 private: 7056 const HPackedSwitch* switch_instr_; 7057 }; 7058 7059 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { 7060 // Generate the constant area if needed. 7061 X86_64Assembler* assembler = GetAssembler(); 7062 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { 7063 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values. 7064 assembler->Align(4, 0); 7065 constant_area_start_ = assembler->CodeSize(); 7066 7067 // Populate any jump tables. 7068 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { 7069 jump_table->CreateJumpTable(); 7070 } 7071 7072 // And now add the constant area to the generated code. 7073 assembler->AddConstantArea(); 7074 } 7075 7076 // And finish up. 7077 CodeGenerator::Finalize(allocator); 7078 } 7079 7080 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { 7081 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v)); 7082 return Address::RIP(fixup); 7083 } 7084 7085 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { 7086 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v)); 7087 return Address::RIP(fixup); 7088 } 7089 7090 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { 7091 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v)); 7092 return Address::RIP(fixup); 7093 } 7094 7095 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { 7096 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v)); 7097 return Address::RIP(fixup); 7098 } 7099 7100 // TODO: trg as memory. 7101 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) { 7102 if (!trg.IsValid()) { 7103 DCHECK_EQ(type, DataType::Type::kVoid); 7104 return; 7105 } 7106 7107 DCHECK_NE(type, DataType::Type::kVoid); 7108 7109 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type); 7110 if (trg.Equals(return_loc)) { 7111 return; 7112 } 7113 7114 // Let the parallel move resolver take care of all of this. 7115 HParallelMove parallel_move(GetGraph()->GetAllocator()); 7116 parallel_move.AddMove(return_loc, trg, type, nullptr); 7117 GetMoveResolver()->EmitNativeCode(¶llel_move); 7118 } 7119 7120 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { 7121 // Create a fixup to be used to create and address the jump table. 7122 JumpTableRIPFixup* table_fixup = 7123 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr); 7124 7125 // We have to populate the jump tables. 7126 fixups_to_jump_tables_.push_back(table_fixup); 7127 return Address::RIP(table_fixup); 7128 } 7129 7130 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, 7131 const Address& addr_high, 7132 int64_t v, 7133 HInstruction* instruction) { 7134 if (IsInt<32>(v)) { 7135 int32_t v_32 = v; 7136 __ movq(addr_low, Immediate(v_32)); 7137 MaybeRecordImplicitNullCheck(instruction); 7138 } else { 7139 // Didn't fit in a register. Do it in pieces. 7140 int32_t low_v = Low32Bits(v); 7141 int32_t high_v = High32Bits(v); 7142 __ movl(addr_low, Immediate(low_v)); 7143 MaybeRecordImplicitNullCheck(instruction); 7144 __ movl(addr_high, Immediate(high_v)); 7145 } 7146 } 7147 7148 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, 7149 const uint8_t* roots_data, 7150 const PatchInfo<Label>& info, 7151 uint64_t index_in_table) const { 7152 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 7153 uintptr_t address = 7154 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 7155 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; 7156 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = 7157 dchecked_integral_cast<uint32_t>(address); 7158 } 7159 7160 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 7161 for (const PatchInfo<Label>& info : jit_string_patches_) { 7162 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); 7163 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 7164 PatchJitRootUse(code, roots_data, info, index_in_table); 7165 } 7166 7167 for (const PatchInfo<Label>& info : jit_class_patches_) { 7168 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); 7169 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 7170 PatchJitRootUse(code, roots_data, info, index_in_table); 7171 } 7172 } 7173 7174 #undef __ 7175 7176 } // namespace x86_64 7177 } // namespace art 7178