1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86.h" 18 19 #include "art_method.h" 20 #include "class_table.h" 21 #include "code_generator_utils.h" 22 #include "compiled_method.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "entrypoints/quick/quick_entrypoints_enum.h" 25 #include "gc/accounting/card_table.h" 26 #include "gc/space/image_space.h" 27 #include "heap_poisoning.h" 28 #include "intrinsics.h" 29 #include "intrinsics_x86.h" 30 #include "linker/linker_patch.h" 31 #include "lock_word.h" 32 #include "mirror/array-inl.h" 33 #include "mirror/class-inl.h" 34 #include "thread.h" 35 #include "utils/assembler.h" 36 #include "utils/stack_checks.h" 37 #include "utils/x86/assembler_x86.h" 38 #include "utils/x86/managed_register_x86.h" 39 40 namespace art { 41 42 template<class MirrorType> 43 class GcRoot; 44 45 namespace x86 { 46 47 static constexpr int kCurrentMethodStackOffset = 0; 48 static constexpr Register kMethodRegisterArgument = EAX; 49 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI }; 50 51 static constexpr int kC2ConditionMask = 0x400; 52 53 static constexpr int kFakeReturnRegister = Register(8); 54 55 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); 56 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); 57 58 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { 59 InvokeRuntimeCallingConvention calling_convention; 60 RegisterSet caller_saves = RegisterSet::Empty(); 61 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 62 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() 63 // that the the kPrimNot result register is the same as the first argument register. 64 return caller_saves; 65 } 66 67 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 68 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT 69 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() 70 71 class NullCheckSlowPathX86 : public SlowPathCode { 72 public: 73 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {} 74 75 void EmitNativeCode(CodeGenerator* codegen) override { 76 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 77 __ Bind(GetEntryLabel()); 78 if (instruction_->CanThrowIntoCatchBlock()) { 79 // Live registers will be restored in the catch block if caught. 80 SaveLiveRegisters(codegen, instruction_->GetLocations()); 81 } 82 x86_codegen->InvokeRuntime(kQuickThrowNullPointer, 83 instruction_, 84 instruction_->GetDexPc(), 85 this); 86 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 87 } 88 89 bool IsFatal() const override { return true; } 90 91 const char* GetDescription() const override { return "NullCheckSlowPathX86"; } 92 93 private: 94 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86); 95 }; 96 97 class DivZeroCheckSlowPathX86 : public SlowPathCode { 98 public: 99 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} 100 101 void EmitNativeCode(CodeGenerator* codegen) override { 102 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 103 __ Bind(GetEntryLabel()); 104 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 105 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 106 } 107 108 bool IsFatal() const override { return true; } 109 110 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; } 111 112 private: 113 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86); 114 }; 115 116 class DivRemMinusOneSlowPathX86 : public SlowPathCode { 117 public: 118 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div) 119 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {} 120 121 void EmitNativeCode(CodeGenerator* codegen) override { 122 __ Bind(GetEntryLabel()); 123 if (is_div_) { 124 __ negl(reg_); 125 } else { 126 __ movl(reg_, Immediate(0)); 127 } 128 __ jmp(GetExitLabel()); 129 } 130 131 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; } 132 133 private: 134 Register reg_; 135 bool is_div_; 136 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86); 137 }; 138 139 class BoundsCheckSlowPathX86 : public SlowPathCode { 140 public: 141 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {} 142 143 void EmitNativeCode(CodeGenerator* codegen) override { 144 LocationSummary* locations = instruction_->GetLocations(); 145 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 146 __ Bind(GetEntryLabel()); 147 // We're moving two locations to locations that could overlap, so we need a parallel 148 // move resolver. 149 if (instruction_->CanThrowIntoCatchBlock()) { 150 // Live registers will be restored in the catch block if caught. 151 SaveLiveRegisters(codegen, instruction_->GetLocations()); 152 } 153 154 // Are we using an array length from memory? 155 HInstruction* array_length = instruction_->InputAt(1); 156 Location length_loc = locations->InAt(1); 157 InvokeRuntimeCallingConvention calling_convention; 158 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { 159 // Load the array length into our temporary. 160 HArrayLength* length = array_length->AsArrayLength(); 161 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); 162 Location array_loc = array_length->GetLocations()->InAt(0); 163 Address array_len(array_loc.AsRegister<Register>(), len_offset); 164 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); 165 // Check for conflicts with index. 166 if (length_loc.Equals(locations->InAt(0))) { 167 // We know we aren't using parameter 2. 168 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); 169 } 170 __ movl(length_loc.AsRegister<Register>(), array_len); 171 if (mirror::kUseStringCompression && length->IsStringLength()) { 172 __ shrl(length_loc.AsRegister<Register>(), Immediate(1)); 173 } 174 } 175 x86_codegen->EmitParallelMoves( 176 locations->InAt(0), 177 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 178 DataType::Type::kInt32, 179 length_loc, 180 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 181 DataType::Type::kInt32); 182 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 183 ? kQuickThrowStringBounds 184 : kQuickThrowArrayBounds; 185 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 186 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 187 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 188 } 189 190 bool IsFatal() const override { return true; } 191 192 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; } 193 194 private: 195 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86); 196 }; 197 198 class SuspendCheckSlowPathX86 : public SlowPathCode { 199 public: 200 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor) 201 : SlowPathCode(instruction), successor_(successor) {} 202 203 void EmitNativeCode(CodeGenerator* codegen) override { 204 LocationSummary* locations = instruction_->GetLocations(); 205 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 206 __ Bind(GetEntryLabel()); 207 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD. 208 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 209 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 210 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD. 211 if (successor_ == nullptr) { 212 __ jmp(GetReturnLabel()); 213 } else { 214 __ jmp(x86_codegen->GetLabelOf(successor_)); 215 } 216 } 217 218 Label* GetReturnLabel() { 219 DCHECK(successor_ == nullptr); 220 return &return_label_; 221 } 222 223 HBasicBlock* GetSuccessor() const { 224 return successor_; 225 } 226 227 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; } 228 229 private: 230 HBasicBlock* const successor_; 231 Label return_label_; 232 233 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86); 234 }; 235 236 class LoadStringSlowPathX86 : public SlowPathCode { 237 public: 238 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {} 239 240 void EmitNativeCode(CodeGenerator* codegen) override { 241 LocationSummary* locations = instruction_->GetLocations(); 242 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 243 244 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 245 __ Bind(GetEntryLabel()); 246 SaveLiveRegisters(codegen, locations); 247 248 InvokeRuntimeCallingConvention calling_convention; 249 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 250 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_)); 251 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); 252 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 253 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); 254 RestoreLiveRegisters(codegen, locations); 255 256 __ jmp(GetExitLabel()); 257 } 258 259 const char* GetDescription() const override { return "LoadStringSlowPathX86"; } 260 261 private: 262 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86); 263 }; 264 265 class LoadClassSlowPathX86 : public SlowPathCode { 266 public: 267 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at) 268 : SlowPathCode(at), cls_(cls) { 269 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 270 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); 271 } 272 273 void EmitNativeCode(CodeGenerator* codegen) override { 274 LocationSummary* locations = instruction_->GetLocations(); 275 Location out = locations->Out(); 276 const uint32_t dex_pc = instruction_->GetDexPc(); 277 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); 278 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); 279 280 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 281 __ Bind(GetEntryLabel()); 282 SaveLiveRegisters(codegen, locations); 283 284 InvokeRuntimeCallingConvention calling_convention; 285 if (must_resolve_type) { 286 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile())); 287 dex::TypeIndex type_index = cls_->GetTypeIndex(); 288 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_)); 289 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); 290 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); 291 // If we also must_do_clinit, the resolved type is now in the correct register. 292 } else { 293 DCHECK(must_do_clinit); 294 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); 295 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source); 296 } 297 if (must_do_clinit) { 298 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); 299 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); 300 } 301 302 // Move the class to the desired location. 303 if (out.IsValid()) { 304 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 305 x86_codegen->Move32(out, Location::RegisterLocation(EAX)); 306 } 307 RestoreLiveRegisters(codegen, locations); 308 __ jmp(GetExitLabel()); 309 } 310 311 const char* GetDescription() const override { return "LoadClassSlowPathX86"; } 312 313 private: 314 // The class this slow path will load. 315 HLoadClass* const cls_; 316 317 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86); 318 }; 319 320 class TypeCheckSlowPathX86 : public SlowPathCode { 321 public: 322 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal) 323 : SlowPathCode(instruction), is_fatal_(is_fatal) {} 324 325 void EmitNativeCode(CodeGenerator* codegen) override { 326 LocationSummary* locations = instruction_->GetLocations(); 327 DCHECK(instruction_->IsCheckCast() 328 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 329 330 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 331 __ Bind(GetEntryLabel()); 332 333 if (kPoisonHeapReferences && 334 instruction_->IsCheckCast() && 335 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) { 336 // First, unpoison the `cls` reference that was poisoned for direct memory comparison. 337 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>()); 338 } 339 340 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 341 SaveLiveRegisters(codegen, locations); 342 } 343 344 // We're moving two locations to locations that could overlap, so we need a parallel 345 // move resolver. 346 InvokeRuntimeCallingConvention calling_convention; 347 x86_codegen->EmitParallelMoves(locations->InAt(0), 348 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 349 DataType::Type::kReference, 350 locations->InAt(1), 351 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 352 DataType::Type::kReference); 353 if (instruction_->IsInstanceOf()) { 354 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, 355 instruction_, 356 instruction_->GetDexPc(), 357 this); 358 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 359 } else { 360 DCHECK(instruction_->IsCheckCast()); 361 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf, 362 instruction_, 363 instruction_->GetDexPc(), 364 this); 365 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 366 } 367 368 if (!is_fatal_) { 369 if (instruction_->IsInstanceOf()) { 370 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); 371 } 372 RestoreLiveRegisters(codegen, locations); 373 374 __ jmp(GetExitLabel()); 375 } 376 } 377 378 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; } 379 bool IsFatal() const override { return is_fatal_; } 380 381 private: 382 const bool is_fatal_; 383 384 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86); 385 }; 386 387 class DeoptimizationSlowPathX86 : public SlowPathCode { 388 public: 389 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction) 390 : SlowPathCode(instruction) {} 391 392 void EmitNativeCode(CodeGenerator* codegen) override { 393 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 394 __ Bind(GetEntryLabel()); 395 LocationSummary* locations = instruction_->GetLocations(); 396 SaveLiveRegisters(codegen, locations); 397 InvokeRuntimeCallingConvention calling_convention; 398 x86_codegen->Load32BitValue( 399 calling_convention.GetRegisterAt(0), 400 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 401 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 402 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 403 } 404 405 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; } 406 407 private: 408 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); 409 }; 410 411 class ArraySetSlowPathX86 : public SlowPathCode { 412 public: 413 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {} 414 415 void EmitNativeCode(CodeGenerator* codegen) override { 416 LocationSummary* locations = instruction_->GetLocations(); 417 __ Bind(GetEntryLabel()); 418 SaveLiveRegisters(codegen, locations); 419 420 InvokeRuntimeCallingConvention calling_convention; 421 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 422 parallel_move.AddMove( 423 locations->InAt(0), 424 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 425 DataType::Type::kReference, 426 nullptr); 427 parallel_move.AddMove( 428 locations->InAt(1), 429 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 430 DataType::Type::kInt32, 431 nullptr); 432 parallel_move.AddMove( 433 locations->InAt(2), 434 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 435 DataType::Type::kReference, 436 nullptr); 437 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 438 439 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 440 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 441 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 442 RestoreLiveRegisters(codegen, locations); 443 __ jmp(GetExitLabel()); 444 } 445 446 const char* GetDescription() const override { return "ArraySetSlowPathX86"; } 447 448 private: 449 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); 450 }; 451 452 // Slow path marking an object reference `ref` during a read 453 // barrier. The field `obj.field` in the object `obj` holding this 454 // reference does not get updated by this slow path after marking (see 455 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that). 456 // 457 // This means that after the execution of this slow path, `ref` will 458 // always be up-to-date, but `obj.field` may not; i.e., after the 459 // flip, `ref` will be a to-space reference, but `obj.field` will 460 // probably still be a from-space reference (unless it gets updated by 461 // another thread, or if another thread installed another object 462 // reference (different from `ref`) in `obj.field`). 463 class ReadBarrierMarkSlowPathX86 : public SlowPathCode { 464 public: 465 ReadBarrierMarkSlowPathX86(HInstruction* instruction, 466 Location ref, 467 bool unpoison_ref_before_marking) 468 : SlowPathCode(instruction), 469 ref_(ref), 470 unpoison_ref_before_marking_(unpoison_ref_before_marking) { 471 DCHECK(kEmitCompilerReadBarrier); 472 } 473 474 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; } 475 476 void EmitNativeCode(CodeGenerator* codegen) override { 477 LocationSummary* locations = instruction_->GetLocations(); 478 Register ref_reg = ref_.AsRegister<Register>(); 479 DCHECK(locations->CanCall()); 480 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 481 DCHECK(instruction_->IsInstanceFieldGet() || 482 instruction_->IsStaticFieldGet() || 483 instruction_->IsArrayGet() || 484 instruction_->IsArraySet() || 485 instruction_->IsLoadClass() || 486 instruction_->IsLoadString() || 487 instruction_->IsInstanceOf() || 488 instruction_->IsCheckCast() || 489 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 490 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 491 << "Unexpected instruction in read barrier marking slow path: " 492 << instruction_->DebugName(); 493 494 __ Bind(GetEntryLabel()); 495 if (unpoison_ref_before_marking_) { 496 // Object* ref = ref_addr->AsMirrorPtr() 497 __ MaybeUnpoisonHeapReference(ref_reg); 498 } 499 // No need to save live registers; it's taken care of by the 500 // entrypoint. Also, there is no need to update the stack mask, 501 // as this runtime call will not trigger a garbage collection. 502 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 503 DCHECK_NE(ref_reg, ESP); 504 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 505 // "Compact" slow path, saving two moves. 506 // 507 // Instead of using the standard runtime calling convention (input 508 // and output in EAX): 509 // 510 // EAX <- ref 511 // EAX <- ReadBarrierMark(EAX) 512 // ref <- EAX 513 // 514 // we just use rX (the register containing `ref`) as input and output 515 // of a dedicated entrypoint: 516 // 517 // rX <- ReadBarrierMarkRegX(rX) 518 // 519 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); 520 // This runtime call does not require a stack map. 521 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 522 __ jmp(GetExitLabel()); 523 } 524 525 private: 526 // The location (register) of the marked object reference. 527 const Location ref_; 528 // Should the reference in `ref_` be unpoisoned prior to marking it? 529 const bool unpoison_ref_before_marking_; 530 531 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); 532 }; 533 534 // Slow path marking an object reference `ref` during a read barrier, 535 // and if needed, atomically updating the field `obj.field` in the 536 // object `obj` holding this reference after marking (contrary to 537 // ReadBarrierMarkSlowPathX86 above, which never tries to update 538 // `obj.field`). 539 // 540 // This means that after the execution of this slow path, both `ref` 541 // and `obj.field` will be up-to-date; i.e., after the flip, both will 542 // hold the same to-space reference (unless another thread installed 543 // another object reference (different from `ref`) in `obj.field`). 544 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { 545 public: 546 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction, 547 Location ref, 548 Register obj, 549 const Address& field_addr, 550 bool unpoison_ref_before_marking, 551 Register temp) 552 : SlowPathCode(instruction), 553 ref_(ref), 554 obj_(obj), 555 field_addr_(field_addr), 556 unpoison_ref_before_marking_(unpoison_ref_before_marking), 557 temp_(temp) { 558 DCHECK(kEmitCompilerReadBarrier); 559 } 560 561 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } 562 563 void EmitNativeCode(CodeGenerator* codegen) override { 564 LocationSummary* locations = instruction_->GetLocations(); 565 Register ref_reg = ref_.AsRegister<Register>(); 566 DCHECK(locations->CanCall()); 567 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 568 // This slow path is only used by the UnsafeCASObject intrinsic. 569 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 570 << "Unexpected instruction in read barrier marking and field updating slow path: " 571 << instruction_->DebugName(); 572 DCHECK(instruction_->GetLocations()->Intrinsified()); 573 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 574 575 __ Bind(GetEntryLabel()); 576 if (unpoison_ref_before_marking_) { 577 // Object* ref = ref_addr->AsMirrorPtr() 578 __ MaybeUnpoisonHeapReference(ref_reg); 579 } 580 581 // Save the old (unpoisoned) reference. 582 __ movl(temp_, ref_reg); 583 584 // No need to save live registers; it's taken care of by the 585 // entrypoint. Also, there is no need to update the stack mask, 586 // as this runtime call will not trigger a garbage collection. 587 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 588 DCHECK_NE(ref_reg, ESP); 589 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 590 // "Compact" slow path, saving two moves. 591 // 592 // Instead of using the standard runtime calling convention (input 593 // and output in EAX): 594 // 595 // EAX <- ref 596 // EAX <- ReadBarrierMark(EAX) 597 // ref <- EAX 598 // 599 // we just use rX (the register containing `ref`) as input and output 600 // of a dedicated entrypoint: 601 // 602 // rX <- ReadBarrierMarkRegX(rX) 603 // 604 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); 605 // This runtime call does not require a stack map. 606 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 607 608 // If the new reference is different from the old reference, 609 // update the field in the holder (`*field_addr`). 610 // 611 // Note that this field could also hold a different object, if 612 // another thread had concurrently changed it. In that case, the 613 // LOCK CMPXCHGL instruction in the compare-and-set (CAS) 614 // operation below would abort the CAS, leaving the field as-is. 615 NearLabel done; 616 __ cmpl(temp_, ref_reg); 617 __ j(kEqual, &done); 618 619 // Update the the holder's field atomically. This may fail if 620 // mutator updates before us, but it's OK. This is achieved 621 // using a strong compare-and-set (CAS) operation with relaxed 622 // memory synchronization ordering, where the expected value is 623 // the old reference and the desired value is the new reference. 624 // This operation is implemented with a 32-bit LOCK CMPXLCHG 625 // instruction, which requires the expected value (the old 626 // reference) to be in EAX. Save EAX beforehand, and move the 627 // expected value (stored in `temp_`) into EAX. 628 __ pushl(EAX); 629 __ movl(EAX, temp_); 630 631 // Convenience aliases. 632 Register base = obj_; 633 Register expected = EAX; 634 Register value = ref_reg; 635 636 bool base_equals_value = (base == value); 637 if (kPoisonHeapReferences) { 638 if (base_equals_value) { 639 // If `base` and `value` are the same register location, move 640 // `value` to a temporary register. This way, poisoning 641 // `value` won't invalidate `base`. 642 value = temp_; 643 __ movl(value, base); 644 } 645 646 // Check that the register allocator did not assign the location 647 // of `expected` (EAX) to `value` nor to `base`, so that heap 648 // poisoning (when enabled) works as intended below. 649 // - If `value` were equal to `expected`, both references would 650 // be poisoned twice, meaning they would not be poisoned at 651 // all, as heap poisoning uses address negation. 652 // - If `base` were equal to `expected`, poisoning `expected` 653 // would invalidate `base`. 654 DCHECK_NE(value, expected); 655 DCHECK_NE(base, expected); 656 657 __ PoisonHeapReference(expected); 658 __ PoisonHeapReference(value); 659 } 660 661 __ LockCmpxchgl(field_addr_, value); 662 663 // If heap poisoning is enabled, we need to unpoison the values 664 // that were poisoned earlier. 665 if (kPoisonHeapReferences) { 666 if (base_equals_value) { 667 // `value` has been moved to a temporary register, no need 668 // to unpoison it. 669 } else { 670 __ UnpoisonHeapReference(value); 671 } 672 // No need to unpoison `expected` (EAX), as it is be overwritten below. 673 } 674 675 // Restore EAX. 676 __ popl(EAX); 677 678 __ Bind(&done); 679 __ jmp(GetExitLabel()); 680 } 681 682 private: 683 // The location (register) of the marked object reference. 684 const Location ref_; 685 // The register containing the object holding the marked object reference field. 686 const Register obj_; 687 // The address of the marked reference field. The base of this address must be `obj_`. 688 const Address field_addr_; 689 690 // Should the reference in `ref_` be unpoisoned prior to marking it? 691 const bool unpoison_ref_before_marking_; 692 693 const Register temp_; 694 695 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86); 696 }; 697 698 // Slow path generating a read barrier for a heap reference. 699 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { 700 public: 701 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction, 702 Location out, 703 Location ref, 704 Location obj, 705 uint32_t offset, 706 Location index) 707 : SlowPathCode(instruction), 708 out_(out), 709 ref_(ref), 710 obj_(obj), 711 offset_(offset), 712 index_(index) { 713 DCHECK(kEmitCompilerReadBarrier); 714 // If `obj` is equal to `out` or `ref`, it means the initial object 715 // has been overwritten by (or after) the heap object reference load 716 // to be instrumented, e.g.: 717 // 718 // __ movl(out, Address(out, offset)); 719 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 720 // 721 // In that case, we have lost the information about the original 722 // object, and the emitted read barrier cannot work properly. 723 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 724 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 725 } 726 727 void EmitNativeCode(CodeGenerator* codegen) override { 728 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 729 LocationSummary* locations = instruction_->GetLocations(); 730 Register reg_out = out_.AsRegister<Register>(); 731 DCHECK(locations->CanCall()); 732 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); 733 DCHECK(instruction_->IsInstanceFieldGet() || 734 instruction_->IsStaticFieldGet() || 735 instruction_->IsArrayGet() || 736 instruction_->IsInstanceOf() || 737 instruction_->IsCheckCast() || 738 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 739 << "Unexpected instruction in read barrier for heap reference slow path: " 740 << instruction_->DebugName(); 741 742 __ Bind(GetEntryLabel()); 743 SaveLiveRegisters(codegen, locations); 744 745 // We may have to change the index's value, but as `index_` is a 746 // constant member (like other "inputs" of this slow path), 747 // introduce a copy of it, `index`. 748 Location index = index_; 749 if (index_.IsValid()) { 750 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 751 if (instruction_->IsArrayGet()) { 752 // Compute the actual memory offset and store it in `index`. 753 Register index_reg = index_.AsRegister<Register>(); 754 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); 755 if (codegen->IsCoreCalleeSaveRegister(index_reg)) { 756 // We are about to change the value of `index_reg` (see the 757 // calls to art::x86::X86Assembler::shll and 758 // art::x86::X86Assembler::AddImmediate below), but it has 759 // not been saved by the previous call to 760 // art::SlowPathCode::SaveLiveRegisters, as it is a 761 // callee-save register -- 762 // art::SlowPathCode::SaveLiveRegisters does not consider 763 // callee-save registers, as it has been designed with the 764 // assumption that callee-save registers are supposed to be 765 // handled by the called function. So, as a callee-save 766 // register, `index_reg` _would_ eventually be saved onto 767 // the stack, but it would be too late: we would have 768 // changed its value earlier. Therefore, we manually save 769 // it here into another freely available register, 770 // `free_reg`, chosen of course among the caller-save 771 // registers (as a callee-save `free_reg` register would 772 // exhibit the same problem). 773 // 774 // Note we could have requested a temporary register from 775 // the register allocator instead; but we prefer not to, as 776 // this is a slow path, and we know we can find a 777 // caller-save register that is available. 778 Register free_reg = FindAvailableCallerSaveRegister(codegen); 779 __ movl(free_reg, index_reg); 780 index_reg = free_reg; 781 index = Location::RegisterLocation(index_reg); 782 } else { 783 // The initial register stored in `index_` has already been 784 // saved in the call to art::SlowPathCode::SaveLiveRegisters 785 // (as it is not a callee-save register), so we can freely 786 // use it. 787 } 788 // Shifting the index value contained in `index_reg` by the scale 789 // factor (2) cannot overflow in practice, as the runtime is 790 // unable to allocate object arrays with a size larger than 791 // 2^26 - 1 (that is, 2^28 - 4 bytes). 792 __ shll(index_reg, Immediate(TIMES_4)); 793 static_assert( 794 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 795 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 796 __ AddImmediate(index_reg, Immediate(offset_)); 797 } else { 798 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 799 // intrinsics, `index_` is not shifted by a scale factor of 2 800 // (as in the case of ArrayGet), as it is actually an offset 801 // to an object field within an object. 802 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 803 DCHECK(instruction_->GetLocations()->Intrinsified()); 804 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 805 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 806 << instruction_->AsInvoke()->GetIntrinsic(); 807 DCHECK_EQ(offset_, 0U); 808 DCHECK(index_.IsRegisterPair()); 809 // UnsafeGet's offset location is a register pair, the low 810 // part contains the correct offset. 811 index = index_.ToLow(); 812 } 813 } 814 815 // We're moving two or three locations to locations that could 816 // overlap, so we need a parallel move resolver. 817 InvokeRuntimeCallingConvention calling_convention; 818 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 819 parallel_move.AddMove(ref_, 820 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 821 DataType::Type::kReference, 822 nullptr); 823 parallel_move.AddMove(obj_, 824 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 825 DataType::Type::kReference, 826 nullptr); 827 if (index.IsValid()) { 828 parallel_move.AddMove(index, 829 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 830 DataType::Type::kInt32, 831 nullptr); 832 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 833 } else { 834 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 835 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_)); 836 } 837 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this); 838 CheckEntrypointTypes< 839 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 840 x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); 841 842 RestoreLiveRegisters(codegen, locations); 843 __ jmp(GetExitLabel()); 844 } 845 846 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; } 847 848 private: 849 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 850 size_t ref = static_cast<int>(ref_.AsRegister<Register>()); 851 size_t obj = static_cast<int>(obj_.AsRegister<Register>()); 852 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 853 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 854 return static_cast<Register>(i); 855 } 856 } 857 // We shall never fail to find a free caller-save register, as 858 // there are more than two core caller-save registers on x86 859 // (meaning it is possible to find one which is different from 860 // `ref` and `obj`). 861 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 862 LOG(FATAL) << "Could not find a free caller-save register"; 863 UNREACHABLE(); 864 } 865 866 const Location out_; 867 const Location ref_; 868 const Location obj_; 869 const uint32_t offset_; 870 // An additional location containing an index to an array. 871 // Only used for HArrayGet and the UnsafeGetObject & 872 // UnsafeGetObjectVolatile intrinsics. 873 const Location index_; 874 875 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86); 876 }; 877 878 // Slow path generating a read barrier for a GC root. 879 class ReadBarrierForRootSlowPathX86 : public SlowPathCode { 880 public: 881 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) 882 : SlowPathCode(instruction), out_(out), root_(root) { 883 DCHECK(kEmitCompilerReadBarrier); 884 } 885 886 void EmitNativeCode(CodeGenerator* codegen) override { 887 LocationSummary* locations = instruction_->GetLocations(); 888 Register reg_out = out_.AsRegister<Register>(); 889 DCHECK(locations->CanCall()); 890 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); 891 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 892 << "Unexpected instruction in read barrier for GC root slow path: " 893 << instruction_->DebugName(); 894 895 __ Bind(GetEntryLabel()); 896 SaveLiveRegisters(codegen, locations); 897 898 InvokeRuntimeCallingConvention calling_convention; 899 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); 900 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); 901 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 902 instruction_, 903 instruction_->GetDexPc(), 904 this); 905 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 906 x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); 907 908 RestoreLiveRegisters(codegen, locations); 909 __ jmp(GetExitLabel()); 910 } 911 912 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; } 913 914 private: 915 const Location out_; 916 const Location root_; 917 918 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86); 919 }; 920 921 #undef __ 922 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 923 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT 924 925 inline Condition X86Condition(IfCondition cond) { 926 switch (cond) { 927 case kCondEQ: return kEqual; 928 case kCondNE: return kNotEqual; 929 case kCondLT: return kLess; 930 case kCondLE: return kLessEqual; 931 case kCondGT: return kGreater; 932 case kCondGE: return kGreaterEqual; 933 case kCondB: return kBelow; 934 case kCondBE: return kBelowEqual; 935 case kCondA: return kAbove; 936 case kCondAE: return kAboveEqual; 937 } 938 LOG(FATAL) << "Unreachable"; 939 UNREACHABLE(); 940 } 941 942 // Maps signed condition to unsigned condition and FP condition to x86 name. 943 inline Condition X86UnsignedOrFPCondition(IfCondition cond) { 944 switch (cond) { 945 case kCondEQ: return kEqual; 946 case kCondNE: return kNotEqual; 947 // Signed to unsigned, and FP to x86 name. 948 case kCondLT: return kBelow; 949 case kCondLE: return kBelowEqual; 950 case kCondGT: return kAbove; 951 case kCondGE: return kAboveEqual; 952 // Unsigned remain unchanged. 953 case kCondB: return kBelow; 954 case kCondBE: return kBelowEqual; 955 case kCondA: return kAbove; 956 case kCondAE: return kAboveEqual; 957 } 958 LOG(FATAL) << "Unreachable"; 959 UNREACHABLE(); 960 } 961 962 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const { 963 stream << Register(reg); 964 } 965 966 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 967 stream << XmmRegister(reg); 968 } 969 970 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const { 971 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures(); 972 } 973 974 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 975 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id)); 976 return kX86WordSize; 977 } 978 979 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 980 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index)); 981 return kX86WordSize; 982 } 983 984 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 985 if (GetGraph()->HasSIMD()) { 986 __ movups(Address(ESP, stack_index), XmmRegister(reg_id)); 987 } else { 988 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); 989 } 990 return GetFloatingPointSpillSlotSize(); 991 } 992 993 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 994 if (GetGraph()->HasSIMD()) { 995 __ movups(XmmRegister(reg_id), Address(ESP, stack_index)); 996 } else { 997 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); 998 } 999 return GetFloatingPointSpillSlotSize(); 1000 } 1001 1002 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint, 1003 HInstruction* instruction, 1004 uint32_t dex_pc, 1005 SlowPathCode* slow_path) { 1006 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 1007 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value()); 1008 if (EntrypointRequiresStackMap(entrypoint)) { 1009 RecordPcInfo(instruction, dex_pc, slow_path); 1010 } 1011 } 1012 1013 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 1014 HInstruction* instruction, 1015 SlowPathCode* slow_path) { 1016 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 1017 GenerateInvokeRuntime(entry_point_offset); 1018 } 1019 1020 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { 1021 __ fs()->call(Address::Absolute(entry_point_offset)); 1022 } 1023 1024 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, 1025 const CompilerOptions& compiler_options, 1026 OptimizingCompilerStats* stats) 1027 : CodeGenerator(graph, 1028 kNumberOfCpuRegisters, 1029 kNumberOfXmmRegisters, 1030 kNumberOfRegisterPairs, 1031 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), 1032 arraysize(kCoreCalleeSaves)) 1033 | (1 << kFakeReturnRegister), 1034 0, 1035 compiler_options, 1036 stats), 1037 block_labels_(nullptr), 1038 location_builder_(graph, this), 1039 instruction_visitor_(graph, this), 1040 move_resolver_(graph->GetAllocator(), this), 1041 assembler_(graph->GetAllocator()), 1042 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1043 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1044 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1045 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1046 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1047 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1048 boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1049 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1050 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1051 constant_area_start_(-1), 1052 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1053 method_address_offset_(std::less<uint32_t>(), 1054 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 1055 // Use a fake return address register to mimic Quick. 1056 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); 1057 } 1058 1059 void CodeGeneratorX86::SetupBlockedRegisters() const { 1060 // Stack register is always reserved. 1061 blocked_core_registers_[ESP] = true; 1062 } 1063 1064 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen) 1065 : InstructionCodeGenerator(graph, codegen), 1066 assembler_(codegen->GetAssembler()), 1067 codegen_(codegen) {} 1068 1069 static dwarf::Reg DWARFReg(Register reg) { 1070 return dwarf::Reg::X86Core(static_cast<int>(reg)); 1071 } 1072 1073 void CodeGeneratorX86::GenerateFrameEntry() { 1074 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address 1075 __ Bind(&frame_entry_label_); 1076 bool skip_overflow_check = 1077 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); 1078 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1079 1080 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 1081 __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()), 1082 Immediate(1)); 1083 } 1084 1085 if (!skip_overflow_check) { 1086 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86); 1087 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes))); 1088 RecordPcInfo(nullptr, 0); 1089 } 1090 1091 if (HasEmptyFrame()) { 1092 return; 1093 } 1094 1095 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { 1096 Register reg = kCoreCalleeSaves[i]; 1097 if (allocated_registers_.ContainsCoreRegister(reg)) { 1098 __ pushl(reg); 1099 __ cfi().AdjustCFAOffset(kX86WordSize); 1100 __ cfi().RelOffset(DWARFReg(reg), 0); 1101 } 1102 } 1103 1104 int adjust = GetFrameSize() - FrameEntrySpillSize(); 1105 __ subl(ESP, Immediate(adjust)); 1106 __ cfi().AdjustCFAOffset(adjust); 1107 // Save the current method if we need it. Note that we do not 1108 // do this in HCurrentMethod, as the instruction might have been removed 1109 // in the SSA graph. 1110 if (RequiresCurrentMethod()) { 1111 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); 1112 } 1113 1114 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1115 // Initialize should_deoptimize flag to 0. 1116 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); 1117 } 1118 } 1119 1120 void CodeGeneratorX86::GenerateFrameExit() { 1121 __ cfi().RememberState(); 1122 if (!HasEmptyFrame()) { 1123 int adjust = GetFrameSize() - FrameEntrySpillSize(); 1124 __ addl(ESP, Immediate(adjust)); 1125 __ cfi().AdjustCFAOffset(-adjust); 1126 1127 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { 1128 Register reg = kCoreCalleeSaves[i]; 1129 if (allocated_registers_.ContainsCoreRegister(reg)) { 1130 __ popl(reg); 1131 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize)); 1132 __ cfi().Restore(DWARFReg(reg)); 1133 } 1134 } 1135 } 1136 __ ret(); 1137 __ cfi().RestoreState(); 1138 __ cfi().DefCFAOffset(GetFrameSize()); 1139 } 1140 1141 void CodeGeneratorX86::Bind(HBasicBlock* block) { 1142 __ Bind(GetLabelOf(block)); 1143 } 1144 1145 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const { 1146 switch (type) { 1147 case DataType::Type::kReference: 1148 case DataType::Type::kBool: 1149 case DataType::Type::kUint8: 1150 case DataType::Type::kInt8: 1151 case DataType::Type::kUint16: 1152 case DataType::Type::kInt16: 1153 case DataType::Type::kUint32: 1154 case DataType::Type::kInt32: 1155 return Location::RegisterLocation(EAX); 1156 1157 case DataType::Type::kUint64: 1158 case DataType::Type::kInt64: 1159 return Location::RegisterPairLocation(EAX, EDX); 1160 1161 case DataType::Type::kVoid: 1162 return Location::NoLocation(); 1163 1164 case DataType::Type::kFloat64: 1165 case DataType::Type::kFloat32: 1166 return Location::FpuRegisterLocation(XMM0); 1167 } 1168 1169 UNREACHABLE(); 1170 } 1171 1172 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const { 1173 return Location::RegisterLocation(kMethodRegisterArgument); 1174 } 1175 1176 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) { 1177 switch (type) { 1178 case DataType::Type::kReference: 1179 case DataType::Type::kBool: 1180 case DataType::Type::kUint8: 1181 case DataType::Type::kInt8: 1182 case DataType::Type::kUint16: 1183 case DataType::Type::kInt16: 1184 case DataType::Type::kInt32: { 1185 uint32_t index = gp_index_++; 1186 stack_index_++; 1187 if (index < calling_convention.GetNumberOfRegisters()) { 1188 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 1189 } else { 1190 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 1191 } 1192 } 1193 1194 case DataType::Type::kInt64: { 1195 uint32_t index = gp_index_; 1196 gp_index_ += 2; 1197 stack_index_ += 2; 1198 if (index + 1 < calling_convention.GetNumberOfRegisters()) { 1199 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair( 1200 calling_convention.GetRegisterPairAt(index)); 1201 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); 1202 } else { 1203 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 1204 } 1205 } 1206 1207 case DataType::Type::kFloat32: { 1208 uint32_t index = float_index_++; 1209 stack_index_++; 1210 if (index < calling_convention.GetNumberOfFpuRegisters()) { 1211 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 1212 } else { 1213 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 1214 } 1215 } 1216 1217 case DataType::Type::kFloat64: { 1218 uint32_t index = float_index_++; 1219 stack_index_ += 2; 1220 if (index < calling_convention.GetNumberOfFpuRegisters()) { 1221 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 1222 } else { 1223 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 1224 } 1225 } 1226 1227 case DataType::Type::kUint32: 1228 case DataType::Type::kUint64: 1229 case DataType::Type::kVoid: 1230 LOG(FATAL) << "Unexpected parameter type " << type; 1231 UNREACHABLE(); 1232 } 1233 return Location::NoLocation(); 1234 } 1235 1236 void CodeGeneratorX86::Move32(Location destination, Location source) { 1237 if (source.Equals(destination)) { 1238 return; 1239 } 1240 if (destination.IsRegister()) { 1241 if (source.IsRegister()) { 1242 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>()); 1243 } else if (source.IsFpuRegister()) { 1244 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>()); 1245 } else { 1246 DCHECK(source.IsStackSlot()); 1247 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex())); 1248 } 1249 } else if (destination.IsFpuRegister()) { 1250 if (source.IsRegister()) { 1251 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>()); 1252 } else if (source.IsFpuRegister()) { 1253 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 1254 } else { 1255 DCHECK(source.IsStackSlot()); 1256 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); 1257 } 1258 } else { 1259 DCHECK(destination.IsStackSlot()) << destination; 1260 if (source.IsRegister()) { 1261 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>()); 1262 } else if (source.IsFpuRegister()) { 1263 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); 1264 } else if (source.IsConstant()) { 1265 HConstant* constant = source.GetConstant(); 1266 int32_t value = GetInt32ValueOf(constant); 1267 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); 1268 } else { 1269 DCHECK(source.IsStackSlot()); 1270 __ pushl(Address(ESP, source.GetStackIndex())); 1271 __ popl(Address(ESP, destination.GetStackIndex())); 1272 } 1273 } 1274 } 1275 1276 void CodeGeneratorX86::Move64(Location destination, Location source) { 1277 if (source.Equals(destination)) { 1278 return; 1279 } 1280 if (destination.IsRegisterPair()) { 1281 if (source.IsRegisterPair()) { 1282 EmitParallelMoves( 1283 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()), 1284 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()), 1285 DataType::Type::kInt32, 1286 Location::RegisterLocation(source.AsRegisterPairLow<Register>()), 1287 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), 1288 DataType::Type::kInt32); 1289 } else if (source.IsFpuRegister()) { 1290 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>(); 1291 __ movd(destination.AsRegisterPairLow<Register>(), src_reg); 1292 __ psrlq(src_reg, Immediate(32)); 1293 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg); 1294 } else { 1295 // No conflict possible, so just do the moves. 1296 DCHECK(source.IsDoubleStackSlot()); 1297 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex())); 1298 __ movl(destination.AsRegisterPairHigh<Register>(), 1299 Address(ESP, source.GetHighStackIndex(kX86WordSize))); 1300 } 1301 } else if (destination.IsFpuRegister()) { 1302 if (source.IsFpuRegister()) { 1303 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 1304 } else if (source.IsDoubleStackSlot()) { 1305 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); 1306 } else if (source.IsRegisterPair()) { 1307 size_t elem_size = DataType::Size(DataType::Type::kInt32); 1308 // Create stack space for 2 elements. 1309 __ subl(ESP, Immediate(2 * elem_size)); 1310 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>()); 1311 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>()); 1312 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 1313 // And remove the temporary stack space we allocated. 1314 __ addl(ESP, Immediate(2 * elem_size)); 1315 } else { 1316 LOG(FATAL) << "Unimplemented"; 1317 } 1318 } else { 1319 DCHECK(destination.IsDoubleStackSlot()) << destination; 1320 if (source.IsRegisterPair()) { 1321 // No conflict possible, so just do the moves. 1322 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>()); 1323 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), 1324 source.AsRegisterPairHigh<Register>()); 1325 } else if (source.IsFpuRegister()) { 1326 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); 1327 } else if (source.IsConstant()) { 1328 HConstant* constant = source.GetConstant(); 1329 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant()); 1330 int64_t value = GetInt64ValueOf(constant); 1331 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value))); 1332 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), 1333 Immediate(High32Bits(value))); 1334 } else { 1335 DCHECK(source.IsDoubleStackSlot()) << source; 1336 EmitParallelMoves( 1337 Location::StackSlot(source.GetStackIndex()), 1338 Location::StackSlot(destination.GetStackIndex()), 1339 DataType::Type::kInt32, 1340 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), 1341 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)), 1342 DataType::Type::kInt32); 1343 } 1344 } 1345 } 1346 1347 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) { 1348 DCHECK(location.IsRegister()); 1349 __ movl(location.AsRegister<Register>(), Immediate(value)); 1350 } 1351 1352 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) { 1353 HParallelMove move(GetGraph()->GetAllocator()); 1354 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) { 1355 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr); 1356 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr); 1357 } else { 1358 move.AddMove(src, dst, dst_type, nullptr); 1359 } 1360 GetMoveResolver()->EmitNativeCode(&move); 1361 } 1362 1363 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) { 1364 if (location.IsRegister()) { 1365 locations->AddTemp(location); 1366 } else if (location.IsRegisterPair()) { 1367 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>())); 1368 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>())); 1369 } else { 1370 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1371 } 1372 } 1373 1374 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) { 1375 if (successor->IsExitBlock()) { 1376 DCHECK(got->GetPrevious()->AlwaysThrows()); 1377 return; // no code needed 1378 } 1379 1380 HBasicBlock* block = got->GetBlock(); 1381 HInstruction* previous = got->GetPrevious(); 1382 1383 HLoopInformation* info = block->GetLoopInformation(); 1384 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 1385 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { 1386 __ pushl(EAX); 1387 __ movl(EAX, Address(ESP, kX86WordSize)); 1388 __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); 1389 __ popl(EAX); 1390 } 1391 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 1392 return; 1393 } 1394 1395 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 1396 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 1397 } 1398 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { 1399 __ jmp(codegen_->GetLabelOf(successor)); 1400 } 1401 } 1402 1403 void LocationsBuilderX86::VisitGoto(HGoto* got) { 1404 got->SetLocations(nullptr); 1405 } 1406 1407 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) { 1408 HandleGoto(got, got->GetSuccessor()); 1409 } 1410 1411 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) { 1412 try_boundary->SetLocations(nullptr); 1413 } 1414 1415 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) { 1416 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 1417 if (!successor->IsExitBlock()) { 1418 HandleGoto(try_boundary, successor); 1419 } 1420 } 1421 1422 void LocationsBuilderX86::VisitExit(HExit* exit) { 1423 exit->SetLocations(nullptr); 1424 } 1425 1426 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 1427 } 1428 1429 template<class LabelType> 1430 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond, 1431 LabelType* true_label, 1432 LabelType* false_label) { 1433 if (cond->IsFPConditionTrueIfNaN()) { 1434 __ j(kUnordered, true_label); 1435 } else if (cond->IsFPConditionFalseIfNaN()) { 1436 __ j(kUnordered, false_label); 1437 } 1438 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label); 1439 } 1440 1441 template<class LabelType> 1442 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, 1443 LabelType* true_label, 1444 LabelType* false_label) { 1445 LocationSummary* locations = cond->GetLocations(); 1446 Location left = locations->InAt(0); 1447 Location right = locations->InAt(1); 1448 IfCondition if_cond = cond->GetCondition(); 1449 1450 Register left_high = left.AsRegisterPairHigh<Register>(); 1451 Register left_low = left.AsRegisterPairLow<Register>(); 1452 IfCondition true_high_cond = if_cond; 1453 IfCondition false_high_cond = cond->GetOppositeCondition(); 1454 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part 1455 1456 // Set the conditions for the test, remembering that == needs to be 1457 // decided using the low words. 1458 switch (if_cond) { 1459 case kCondEQ: 1460 case kCondNE: 1461 // Nothing to do. 1462 break; 1463 case kCondLT: 1464 false_high_cond = kCondGT; 1465 break; 1466 case kCondLE: 1467 true_high_cond = kCondLT; 1468 break; 1469 case kCondGT: 1470 false_high_cond = kCondLT; 1471 break; 1472 case kCondGE: 1473 true_high_cond = kCondGT; 1474 break; 1475 case kCondB: 1476 false_high_cond = kCondA; 1477 break; 1478 case kCondBE: 1479 true_high_cond = kCondB; 1480 break; 1481 case kCondA: 1482 false_high_cond = kCondB; 1483 break; 1484 case kCondAE: 1485 true_high_cond = kCondA; 1486 break; 1487 } 1488 1489 if (right.IsConstant()) { 1490 int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); 1491 int32_t val_high = High32Bits(value); 1492 int32_t val_low = Low32Bits(value); 1493 1494 codegen_->Compare32BitValue(left_high, val_high); 1495 if (if_cond == kCondNE) { 1496 __ j(X86Condition(true_high_cond), true_label); 1497 } else if (if_cond == kCondEQ) { 1498 __ j(X86Condition(false_high_cond), false_label); 1499 } else { 1500 __ j(X86Condition(true_high_cond), true_label); 1501 __ j(X86Condition(false_high_cond), false_label); 1502 } 1503 // Must be equal high, so compare the lows. 1504 codegen_->Compare32BitValue(left_low, val_low); 1505 } else if (right.IsRegisterPair()) { 1506 Register right_high = right.AsRegisterPairHigh<Register>(); 1507 Register right_low = right.AsRegisterPairLow<Register>(); 1508 1509 __ cmpl(left_high, right_high); 1510 if (if_cond == kCondNE) { 1511 __ j(X86Condition(true_high_cond), true_label); 1512 } else if (if_cond == kCondEQ) { 1513 __ j(X86Condition(false_high_cond), false_label); 1514 } else { 1515 __ j(X86Condition(true_high_cond), true_label); 1516 __ j(X86Condition(false_high_cond), false_label); 1517 } 1518 // Must be equal high, so compare the lows. 1519 __ cmpl(left_low, right_low); 1520 } else { 1521 DCHECK(right.IsDoubleStackSlot()); 1522 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize))); 1523 if (if_cond == kCondNE) { 1524 __ j(X86Condition(true_high_cond), true_label); 1525 } else if (if_cond == kCondEQ) { 1526 __ j(X86Condition(false_high_cond), false_label); 1527 } else { 1528 __ j(X86Condition(true_high_cond), true_label); 1529 __ j(X86Condition(false_high_cond), false_label); 1530 } 1531 // Must be equal high, so compare the lows. 1532 __ cmpl(left_low, Address(ESP, right.GetStackIndex())); 1533 } 1534 // The last comparison might be unsigned. 1535 __ j(final_condition, true_label); 1536 } 1537 1538 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, 1539 Location rhs, 1540 HInstruction* insn, 1541 bool is_double) { 1542 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable(); 1543 if (is_double) { 1544 if (rhs.IsFpuRegister()) { 1545 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); 1546 } else if (const_area != nullptr) { 1547 DCHECK(const_area->IsEmittedAtUseSite()); 1548 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), 1549 codegen_->LiteralDoubleAddress( 1550 const_area->GetConstant()->AsDoubleConstant()->GetValue(), 1551 const_area->GetBaseMethodAddress(), 1552 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 1553 } else { 1554 DCHECK(rhs.IsDoubleStackSlot()); 1555 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex())); 1556 } 1557 } else { 1558 if (rhs.IsFpuRegister()) { 1559 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); 1560 } else if (const_area != nullptr) { 1561 DCHECK(const_area->IsEmittedAtUseSite()); 1562 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), 1563 codegen_->LiteralFloatAddress( 1564 const_area->GetConstant()->AsFloatConstant()->GetValue(), 1565 const_area->GetBaseMethodAddress(), 1566 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 1567 } else { 1568 DCHECK(rhs.IsStackSlot()); 1569 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex())); 1570 } 1571 } 1572 } 1573 1574 template<class LabelType> 1575 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition, 1576 LabelType* true_target_in, 1577 LabelType* false_target_in) { 1578 // Generated branching requires both targets to be explicit. If either of the 1579 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. 1580 LabelType fallthrough_target; 1581 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; 1582 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; 1583 1584 LocationSummary* locations = condition->GetLocations(); 1585 Location left = locations->InAt(0); 1586 Location right = locations->InAt(1); 1587 1588 DataType::Type type = condition->InputAt(0)->GetType(); 1589 switch (type) { 1590 case DataType::Type::kInt64: 1591 GenerateLongComparesAndJumps(condition, true_target, false_target); 1592 break; 1593 case DataType::Type::kFloat32: 1594 GenerateFPCompare(left, right, condition, false); 1595 GenerateFPJumps(condition, true_target, false_target); 1596 break; 1597 case DataType::Type::kFloat64: 1598 GenerateFPCompare(left, right, condition, true); 1599 GenerateFPJumps(condition, true_target, false_target); 1600 break; 1601 default: 1602 LOG(FATAL) << "Unexpected compare type " << type; 1603 } 1604 1605 if (false_target != &fallthrough_target) { 1606 __ jmp(false_target); 1607 } 1608 1609 if (fallthrough_target.IsLinked()) { 1610 __ Bind(&fallthrough_target); 1611 } 1612 } 1613 1614 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { 1615 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS 1616 // are set only strictly before `branch`. We can't use the eflags on long/FP 1617 // conditions if they are materialized due to the complex branching. 1618 return cond->IsCondition() && 1619 cond->GetNext() == branch && 1620 cond->InputAt(0)->GetType() != DataType::Type::kInt64 && 1621 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); 1622 } 1623 1624 template<class LabelType> 1625 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction, 1626 size_t condition_input_index, 1627 LabelType* true_target, 1628 LabelType* false_target) { 1629 HInstruction* cond = instruction->InputAt(condition_input_index); 1630 1631 if (true_target == nullptr && false_target == nullptr) { 1632 // Nothing to do. The code always falls through. 1633 return; 1634 } else if (cond->IsIntConstant()) { 1635 // Constant condition, statically compared against "true" (integer value 1). 1636 if (cond->AsIntConstant()->IsTrue()) { 1637 if (true_target != nullptr) { 1638 __ jmp(true_target); 1639 } 1640 } else { 1641 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 1642 if (false_target != nullptr) { 1643 __ jmp(false_target); 1644 } 1645 } 1646 return; 1647 } 1648 1649 // The following code generates these patterns: 1650 // (1) true_target == nullptr && false_target != nullptr 1651 // - opposite condition true => branch to false_target 1652 // (2) true_target != nullptr && false_target == nullptr 1653 // - condition true => branch to true_target 1654 // (3) true_target != nullptr && false_target != nullptr 1655 // - condition true => branch to true_target 1656 // - branch to false_target 1657 if (IsBooleanValueOrMaterializedCondition(cond)) { 1658 if (AreEflagsSetFrom(cond, instruction)) { 1659 if (true_target == nullptr) { 1660 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target); 1661 } else { 1662 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); 1663 } 1664 } else { 1665 // Materialized condition, compare against 0. 1666 Location lhs = instruction->GetLocations()->InAt(condition_input_index); 1667 if (lhs.IsRegister()) { 1668 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); 1669 } else { 1670 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); 1671 } 1672 if (true_target == nullptr) { 1673 __ j(kEqual, false_target); 1674 } else { 1675 __ j(kNotEqual, true_target); 1676 } 1677 } 1678 } else { 1679 // Condition has not been materialized, use its inputs as the comparison and 1680 // its condition as the branch condition. 1681 HCondition* condition = cond->AsCondition(); 1682 1683 // If this is a long or FP comparison that has been folded into 1684 // the HCondition, generate the comparison directly. 1685 DataType::Type type = condition->InputAt(0)->GetType(); 1686 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { 1687 GenerateCompareTestAndBranch(condition, true_target, false_target); 1688 return; 1689 } 1690 1691 Location lhs = condition->GetLocations()->InAt(0); 1692 Location rhs = condition->GetLocations()->InAt(1); 1693 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition). 1694 codegen_->GenerateIntCompare(lhs, rhs); 1695 if (true_target == nullptr) { 1696 __ j(X86Condition(condition->GetOppositeCondition()), false_target); 1697 } else { 1698 __ j(X86Condition(condition->GetCondition()), true_target); 1699 } 1700 } 1701 1702 // If neither branch falls through (case 3), the conditional branch to `true_target` 1703 // was already emitted (case 2) and we need to emit a jump to `false_target`. 1704 if (true_target != nullptr && false_target != nullptr) { 1705 __ jmp(false_target); 1706 } 1707 } 1708 1709 void LocationsBuilderX86::VisitIf(HIf* if_instr) { 1710 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 1711 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 1712 locations->SetInAt(0, Location::Any()); 1713 } 1714 } 1715 1716 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { 1717 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 1718 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 1719 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? 1720 nullptr : codegen_->GetLabelOf(true_successor); 1721 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? 1722 nullptr : codegen_->GetLabelOf(false_successor); 1723 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); 1724 } 1725 1726 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { 1727 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1728 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 1729 InvokeRuntimeCallingConvention calling_convention; 1730 RegisterSet caller_saves = RegisterSet::Empty(); 1731 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1732 locations->SetCustomSlowPathCallerSaves(caller_saves); 1733 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 1734 locations->SetInAt(0, Location::Any()); 1735 } 1736 } 1737 1738 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { 1739 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize); 1740 GenerateTestAndBranch<Label>(deoptimize, 1741 /* condition_input_index= */ 0, 1742 slow_path->GetEntryLabel(), 1743 /* false_target= */ nullptr); 1744 } 1745 1746 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1747 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1748 LocationSummary(flag, LocationSummary::kNoCall); 1749 locations->SetOut(Location::RequiresRegister()); 1750 } 1751 1752 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1753 __ movl(flag->GetLocations()->Out().AsRegister<Register>(), 1754 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 1755 } 1756 1757 static bool SelectCanUseCMOV(HSelect* select) { 1758 // There are no conditional move instructions for XMMs. 1759 if (DataType::IsFloatingPointType(select->GetType())) { 1760 return false; 1761 } 1762 1763 // A FP condition doesn't generate the single CC that we need. 1764 // In 32 bit mode, a long condition doesn't generate a single CC either. 1765 HInstruction* condition = select->GetCondition(); 1766 if (condition->IsCondition()) { 1767 DataType::Type compare_type = condition->InputAt(0)->GetType(); 1768 if (compare_type == DataType::Type::kInt64 || 1769 DataType::IsFloatingPointType(compare_type)) { 1770 return false; 1771 } 1772 } 1773 1774 // We can generate a CMOV for this Select. 1775 return true; 1776 } 1777 1778 void LocationsBuilderX86::VisitSelect(HSelect* select) { 1779 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 1780 if (DataType::IsFloatingPointType(select->GetType())) { 1781 locations->SetInAt(0, Location::RequiresFpuRegister()); 1782 locations->SetInAt(1, Location::Any()); 1783 } else { 1784 locations->SetInAt(0, Location::RequiresRegister()); 1785 if (SelectCanUseCMOV(select)) { 1786 if (select->InputAt(1)->IsConstant()) { 1787 // Cmov can't handle a constant value. 1788 locations->SetInAt(1, Location::RequiresRegister()); 1789 } else { 1790 locations->SetInAt(1, Location::Any()); 1791 } 1792 } else { 1793 locations->SetInAt(1, Location::Any()); 1794 } 1795 } 1796 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 1797 locations->SetInAt(2, Location::RequiresRegister()); 1798 } 1799 locations->SetOut(Location::SameAsFirstInput()); 1800 } 1801 1802 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { 1803 LocationSummary* locations = select->GetLocations(); 1804 DCHECK(locations->InAt(0).Equals(locations->Out())); 1805 if (SelectCanUseCMOV(select)) { 1806 // If both the condition and the source types are integer, we can generate 1807 // a CMOV to implement Select. 1808 1809 HInstruction* select_condition = select->GetCondition(); 1810 Condition cond = kNotEqual; 1811 1812 // Figure out how to test the 'condition'. 1813 if (select_condition->IsCondition()) { 1814 HCondition* condition = select_condition->AsCondition(); 1815 if (!condition->IsEmittedAtUseSite()) { 1816 // This was a previously materialized condition. 1817 // Can we use the existing condition code? 1818 if (AreEflagsSetFrom(condition, select)) { 1819 // Materialization was the previous instruction. Condition codes are right. 1820 cond = X86Condition(condition->GetCondition()); 1821 } else { 1822 // No, we have to recreate the condition code. 1823 Register cond_reg = locations->InAt(2).AsRegister<Register>(); 1824 __ testl(cond_reg, cond_reg); 1825 } 1826 } else { 1827 // We can't handle FP or long here. 1828 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64); 1829 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType())); 1830 LocationSummary* cond_locations = condition->GetLocations(); 1831 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); 1832 cond = X86Condition(condition->GetCondition()); 1833 } 1834 } else { 1835 // Must be a Boolean condition, which needs to be compared to 0. 1836 Register cond_reg = locations->InAt(2).AsRegister<Register>(); 1837 __ testl(cond_reg, cond_reg); 1838 } 1839 1840 // If the condition is true, overwrite the output, which already contains false. 1841 Location false_loc = locations->InAt(0); 1842 Location true_loc = locations->InAt(1); 1843 if (select->GetType() == DataType::Type::kInt64) { 1844 // 64 bit conditional move. 1845 Register false_high = false_loc.AsRegisterPairHigh<Register>(); 1846 Register false_low = false_loc.AsRegisterPairLow<Register>(); 1847 if (true_loc.IsRegisterPair()) { 1848 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>()); 1849 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>()); 1850 } else { 1851 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize))); 1852 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex())); 1853 } 1854 } else { 1855 // 32 bit conditional move. 1856 Register false_reg = false_loc.AsRegister<Register>(); 1857 if (true_loc.IsRegister()) { 1858 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>()); 1859 } else { 1860 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex())); 1861 } 1862 } 1863 } else { 1864 NearLabel false_target; 1865 GenerateTestAndBranch<NearLabel>( 1866 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target); 1867 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); 1868 __ Bind(&false_target); 1869 } 1870 } 1871 1872 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { 1873 new (GetGraph()->GetAllocator()) LocationSummary(info); 1874 } 1875 1876 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) { 1877 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 1878 } 1879 1880 void CodeGeneratorX86::GenerateNop() { 1881 __ nop(); 1882 } 1883 1884 void LocationsBuilderX86::HandleCondition(HCondition* cond) { 1885 LocationSummary* locations = 1886 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); 1887 // Handle the long/FP comparisons made in instruction simplification. 1888 switch (cond->InputAt(0)->GetType()) { 1889 case DataType::Type::kInt64: { 1890 locations->SetInAt(0, Location::RequiresRegister()); 1891 locations->SetInAt(1, Location::Any()); 1892 if (!cond->IsEmittedAtUseSite()) { 1893 locations->SetOut(Location::RequiresRegister()); 1894 } 1895 break; 1896 } 1897 case DataType::Type::kFloat32: 1898 case DataType::Type::kFloat64: { 1899 locations->SetInAt(0, Location::RequiresFpuRegister()); 1900 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) { 1901 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite()); 1902 } else if (cond->InputAt(1)->IsConstant()) { 1903 locations->SetInAt(1, Location::RequiresFpuRegister()); 1904 } else { 1905 locations->SetInAt(1, Location::Any()); 1906 } 1907 if (!cond->IsEmittedAtUseSite()) { 1908 locations->SetOut(Location::RequiresRegister()); 1909 } 1910 break; 1911 } 1912 default: 1913 locations->SetInAt(0, Location::RequiresRegister()); 1914 locations->SetInAt(1, Location::Any()); 1915 if (!cond->IsEmittedAtUseSite()) { 1916 // We need a byte register. 1917 locations->SetOut(Location::RegisterLocation(ECX)); 1918 } 1919 break; 1920 } 1921 } 1922 1923 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { 1924 if (cond->IsEmittedAtUseSite()) { 1925 return; 1926 } 1927 1928 LocationSummary* locations = cond->GetLocations(); 1929 Location lhs = locations->InAt(0); 1930 Location rhs = locations->InAt(1); 1931 Register reg = locations->Out().AsRegister<Register>(); 1932 NearLabel true_label, false_label; 1933 1934 switch (cond->InputAt(0)->GetType()) { 1935 default: { 1936 // Integer case. 1937 1938 // Clear output register: setb only sets the low byte. 1939 __ xorl(reg, reg); 1940 codegen_->GenerateIntCompare(lhs, rhs); 1941 __ setb(X86Condition(cond->GetCondition()), reg); 1942 return; 1943 } 1944 case DataType::Type::kInt64: 1945 GenerateLongComparesAndJumps(cond, &true_label, &false_label); 1946 break; 1947 case DataType::Type::kFloat32: 1948 GenerateFPCompare(lhs, rhs, cond, false); 1949 GenerateFPJumps(cond, &true_label, &false_label); 1950 break; 1951 case DataType::Type::kFloat64: 1952 GenerateFPCompare(lhs, rhs, cond, true); 1953 GenerateFPJumps(cond, &true_label, &false_label); 1954 break; 1955 } 1956 1957 // Convert the jumps into the result. 1958 NearLabel done_label; 1959 1960 // False case: result = 0. 1961 __ Bind(&false_label); 1962 __ xorl(reg, reg); 1963 __ jmp(&done_label); 1964 1965 // True case: result = 1. 1966 __ Bind(&true_label); 1967 __ movl(reg, Immediate(1)); 1968 __ Bind(&done_label); 1969 } 1970 1971 void LocationsBuilderX86::VisitEqual(HEqual* comp) { 1972 HandleCondition(comp); 1973 } 1974 1975 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) { 1976 HandleCondition(comp); 1977 } 1978 1979 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) { 1980 HandleCondition(comp); 1981 } 1982 1983 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) { 1984 HandleCondition(comp); 1985 } 1986 1987 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) { 1988 HandleCondition(comp); 1989 } 1990 1991 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) { 1992 HandleCondition(comp); 1993 } 1994 1995 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 1996 HandleCondition(comp); 1997 } 1998 1999 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 2000 HandleCondition(comp); 2001 } 2002 2003 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) { 2004 HandleCondition(comp); 2005 } 2006 2007 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) { 2008 HandleCondition(comp); 2009 } 2010 2011 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2012 HandleCondition(comp); 2013 } 2014 2015 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2016 HandleCondition(comp); 2017 } 2018 2019 void LocationsBuilderX86::VisitBelow(HBelow* comp) { 2020 HandleCondition(comp); 2021 } 2022 2023 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) { 2024 HandleCondition(comp); 2025 } 2026 2027 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) { 2028 HandleCondition(comp); 2029 } 2030 2031 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) { 2032 HandleCondition(comp); 2033 } 2034 2035 void LocationsBuilderX86::VisitAbove(HAbove* comp) { 2036 HandleCondition(comp); 2037 } 2038 2039 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) { 2040 HandleCondition(comp); 2041 } 2042 2043 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) { 2044 HandleCondition(comp); 2045 } 2046 2047 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) { 2048 HandleCondition(comp); 2049 } 2050 2051 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { 2052 LocationSummary* locations = 2053 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2054 locations->SetOut(Location::ConstantLocation(constant)); 2055 } 2056 2057 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 2058 // Will be generated at use site. 2059 } 2060 2061 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) { 2062 LocationSummary* locations = 2063 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2064 locations->SetOut(Location::ConstantLocation(constant)); 2065 } 2066 2067 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 2068 // Will be generated at use site. 2069 } 2070 2071 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { 2072 LocationSummary* locations = 2073 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2074 locations->SetOut(Location::ConstantLocation(constant)); 2075 } 2076 2077 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 2078 // Will be generated at use site. 2079 } 2080 2081 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) { 2082 LocationSummary* locations = 2083 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2084 locations->SetOut(Location::ConstantLocation(constant)); 2085 } 2086 2087 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 2088 // Will be generated at use site. 2089 } 2090 2091 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) { 2092 LocationSummary* locations = 2093 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2094 locations->SetOut(Location::ConstantLocation(constant)); 2095 } 2096 2097 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) { 2098 // Will be generated at use site. 2099 } 2100 2101 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) { 2102 constructor_fence->SetLocations(nullptr); 2103 } 2104 2105 void InstructionCodeGeneratorX86::VisitConstructorFence( 2106 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 2107 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2108 } 2109 2110 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2111 memory_barrier->SetLocations(nullptr); 2112 } 2113 2114 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2115 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 2116 } 2117 2118 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { 2119 ret->SetLocations(nullptr); 2120 } 2121 2122 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { 2123 codegen_->GenerateFrameExit(); 2124 } 2125 2126 void LocationsBuilderX86::VisitReturn(HReturn* ret) { 2127 LocationSummary* locations = 2128 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); 2129 switch (ret->InputAt(0)->GetType()) { 2130 case DataType::Type::kReference: 2131 case DataType::Type::kBool: 2132 case DataType::Type::kUint8: 2133 case DataType::Type::kInt8: 2134 case DataType::Type::kUint16: 2135 case DataType::Type::kInt16: 2136 case DataType::Type::kInt32: 2137 locations->SetInAt(0, Location::RegisterLocation(EAX)); 2138 break; 2139 2140 case DataType::Type::kInt64: 2141 locations->SetInAt( 2142 0, Location::RegisterPairLocation(EAX, EDX)); 2143 break; 2144 2145 case DataType::Type::kFloat32: 2146 case DataType::Type::kFloat64: 2147 locations->SetInAt( 2148 0, Location::FpuRegisterLocation(XMM0)); 2149 break; 2150 2151 default: 2152 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); 2153 } 2154 } 2155 2156 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { 2157 if (kIsDebugBuild) { 2158 switch (ret->InputAt(0)->GetType()) { 2159 case DataType::Type::kReference: 2160 case DataType::Type::kBool: 2161 case DataType::Type::kUint8: 2162 case DataType::Type::kInt8: 2163 case DataType::Type::kUint16: 2164 case DataType::Type::kInt16: 2165 case DataType::Type::kInt32: 2166 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX); 2167 break; 2168 2169 case DataType::Type::kInt64: 2170 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX); 2171 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX); 2172 break; 2173 2174 case DataType::Type::kFloat32: 2175 case DataType::Type::kFloat64: 2176 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0); 2177 break; 2178 2179 default: 2180 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); 2181 } 2182 } 2183 codegen_->GenerateFrameExit(); 2184 } 2185 2186 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2187 // The trampoline uses the same calling convention as dex calling conventions, 2188 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 2189 // the method_idx. 2190 HandleInvoke(invoke); 2191 } 2192 2193 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2194 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 2195 } 2196 2197 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2198 // Explicit clinit checks triggered by static invokes must have been pruned by 2199 // art::PrepareForRegisterAllocation. 2200 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2201 2202 IntrinsicLocationsBuilderX86 intrinsic(codegen_); 2203 if (intrinsic.TryDispatch(invoke)) { 2204 if (invoke->GetLocations()->CanCall() && 2205 invoke->HasPcRelativeMethodLoadKind() && 2206 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) { 2207 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); 2208 } 2209 return; 2210 } 2211 2212 HandleInvoke(invoke); 2213 2214 // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. 2215 if (invoke->HasPcRelativeMethodLoadKind()) { 2216 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); 2217 } 2218 } 2219 2220 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) { 2221 if (invoke->GetLocations()->Intrinsified()) { 2222 IntrinsicCodeGeneratorX86 intrinsic(codegen); 2223 intrinsic.Dispatch(invoke); 2224 return true; 2225 } 2226 return false; 2227 } 2228 2229 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2230 // Explicit clinit checks triggered by static invokes must have been pruned by 2231 // art::PrepareForRegisterAllocation. 2232 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2233 2234 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2235 return; 2236 } 2237 2238 LocationSummary* locations = invoke->GetLocations(); 2239 codegen_->GenerateStaticOrDirectCall( 2240 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 2241 } 2242 2243 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2244 IntrinsicLocationsBuilderX86 intrinsic(codegen_); 2245 if (intrinsic.TryDispatch(invoke)) { 2246 return; 2247 } 2248 2249 HandleInvoke(invoke); 2250 } 2251 2252 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { 2253 InvokeDexCallingConventionVisitorX86 calling_convention_visitor; 2254 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 2255 } 2256 2257 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2258 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2259 return; 2260 } 2261 2262 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 2263 DCHECK(!codegen_->IsLeafMethod()); 2264 } 2265 2266 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { 2267 // This call to HandleInvoke allocates a temporary (core) register 2268 // which is also used to transfer the hidden argument from FP to 2269 // core register. 2270 HandleInvoke(invoke); 2271 // Add the hidden argument. 2272 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7)); 2273 } 2274 2275 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { 2276 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 2277 LocationSummary* locations = invoke->GetLocations(); 2278 Register temp = locations->GetTemp(0).AsRegister<Register>(); 2279 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 2280 Location receiver = locations->InAt(0); 2281 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2282 2283 // Set the hidden argument. This is safe to do this here, as XMM7 2284 // won't be modified thereafter, before the `call` instruction. 2285 DCHECK_EQ(XMM7, hidden_reg); 2286 __ movl(temp, Immediate(invoke->GetDexMethodIndex())); 2287 __ movd(hidden_reg, temp); 2288 2289 if (receiver.IsStackSlot()) { 2290 __ movl(temp, Address(ESP, receiver.GetStackIndex())); 2291 // /* HeapReference<Class> */ temp = temp->klass_ 2292 __ movl(temp, Address(temp, class_offset)); 2293 } else { 2294 // /* HeapReference<Class> */ temp = receiver->klass_ 2295 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); 2296 } 2297 codegen_->MaybeRecordImplicitNullCheck(invoke); 2298 // Instead of simply (possibly) unpoisoning `temp` here, we should 2299 // emit a read barrier for the previous class reference load. 2300 // However this is not required in practice, as this is an 2301 // intermediate/temporary reference and because the current 2302 // concurrent copying collector keeps the from-space memory 2303 // intact/accessible until the end of the marking phase (the 2304 // concurrent copying collector may not in the future). 2305 __ MaybeUnpoisonHeapReference(temp); 2306 // temp = temp->GetAddressOfIMT() 2307 __ movl(temp, 2308 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); 2309 // temp = temp->GetImtEntryAt(method_offset); 2310 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 2311 invoke->GetImtIndex(), kX86PointerSize)); 2312 __ movl(temp, Address(temp, method_offset)); 2313 // call temp->GetEntryPoint(); 2314 __ call(Address(temp, 2315 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); 2316 2317 DCHECK(!codegen_->IsLeafMethod()); 2318 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 2319 } 2320 2321 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2322 HandleInvoke(invoke); 2323 } 2324 2325 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2326 codegen_->GenerateInvokePolymorphicCall(invoke); 2327 } 2328 2329 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) { 2330 HandleInvoke(invoke); 2331 } 2332 2333 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) { 2334 codegen_->GenerateInvokeCustomCall(invoke); 2335 } 2336 2337 void LocationsBuilderX86::VisitNeg(HNeg* neg) { 2338 LocationSummary* locations = 2339 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 2340 switch (neg->GetResultType()) { 2341 case DataType::Type::kInt32: 2342 case DataType::Type::kInt64: 2343 locations->SetInAt(0, Location::RequiresRegister()); 2344 locations->SetOut(Location::SameAsFirstInput()); 2345 break; 2346 2347 case DataType::Type::kFloat32: 2348 locations->SetInAt(0, Location::RequiresFpuRegister()); 2349 locations->SetOut(Location::SameAsFirstInput()); 2350 locations->AddTemp(Location::RequiresRegister()); 2351 locations->AddTemp(Location::RequiresFpuRegister()); 2352 break; 2353 2354 case DataType::Type::kFloat64: 2355 locations->SetInAt(0, Location::RequiresFpuRegister()); 2356 locations->SetOut(Location::SameAsFirstInput()); 2357 locations->AddTemp(Location::RequiresFpuRegister()); 2358 break; 2359 2360 default: 2361 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2362 } 2363 } 2364 2365 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { 2366 LocationSummary* locations = neg->GetLocations(); 2367 Location out = locations->Out(); 2368 Location in = locations->InAt(0); 2369 switch (neg->GetResultType()) { 2370 case DataType::Type::kInt32: 2371 DCHECK(in.IsRegister()); 2372 DCHECK(in.Equals(out)); 2373 __ negl(out.AsRegister<Register>()); 2374 break; 2375 2376 case DataType::Type::kInt64: 2377 DCHECK(in.IsRegisterPair()); 2378 DCHECK(in.Equals(out)); 2379 __ negl(out.AsRegisterPairLow<Register>()); 2380 // Negation is similar to subtraction from zero. The least 2381 // significant byte triggers a borrow when it is different from 2382 // zero; to take it into account, add 1 to the most significant 2383 // byte if the carry flag (CF) is set to 1 after the first NEGL 2384 // operation. 2385 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0)); 2386 __ negl(out.AsRegisterPairHigh<Register>()); 2387 break; 2388 2389 case DataType::Type::kFloat32: { 2390 DCHECK(in.Equals(out)); 2391 Register constant = locations->GetTemp(0).AsRegister<Register>(); 2392 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 2393 // Implement float negation with an exclusive or with value 2394 // 0x80000000 (mask for bit 31, representing the sign of a 2395 // single-precision floating-point number). 2396 __ movl(constant, Immediate(INT32_C(0x80000000))); 2397 __ movd(mask, constant); 2398 __ xorps(out.AsFpuRegister<XmmRegister>(), mask); 2399 break; 2400 } 2401 2402 case DataType::Type::kFloat64: { 2403 DCHECK(in.Equals(out)); 2404 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2405 // Implement double negation with an exclusive or with value 2406 // 0x8000000000000000 (mask for bit 63, representing the sign of 2407 // a double-precision floating-point number). 2408 __ LoadLongConstant(mask, INT64_C(0x8000000000000000)); 2409 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); 2410 break; 2411 } 2412 2413 default: 2414 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2415 } 2416 } 2417 2418 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) { 2419 LocationSummary* locations = 2420 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 2421 DCHECK(DataType::IsFloatingPointType(neg->GetType())); 2422 locations->SetInAt(0, Location::RequiresFpuRegister()); 2423 locations->SetInAt(1, Location::RequiresRegister()); 2424 locations->SetOut(Location::SameAsFirstInput()); 2425 locations->AddTemp(Location::RequiresFpuRegister()); 2426 } 2427 2428 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { 2429 LocationSummary* locations = neg->GetLocations(); 2430 Location out = locations->Out(); 2431 DCHECK(locations->InAt(0).Equals(out)); 2432 2433 Register constant_area = locations->InAt(1).AsRegister<Register>(); 2434 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2435 if (neg->GetType() == DataType::Type::kFloat32) { 2436 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), 2437 neg->GetBaseMethodAddress(), 2438 constant_area)); 2439 __ xorps(out.AsFpuRegister<XmmRegister>(), mask); 2440 } else { 2441 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), 2442 neg->GetBaseMethodAddress(), 2443 constant_area)); 2444 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); 2445 } 2446 } 2447 2448 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { 2449 DataType::Type result_type = conversion->GetResultType(); 2450 DataType::Type input_type = conversion->GetInputType(); 2451 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2452 << input_type << " -> " << result_type; 2453 2454 // The float-to-long and double-to-long type conversions rely on a 2455 // call to the runtime. 2456 LocationSummary::CallKind call_kind = 2457 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64) 2458 && result_type == DataType::Type::kInt64) 2459 ? LocationSummary::kCallOnMainOnly 2460 : LocationSummary::kNoCall; 2461 LocationSummary* locations = 2462 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind); 2463 2464 switch (result_type) { 2465 case DataType::Type::kUint8: 2466 case DataType::Type::kInt8: 2467 switch (input_type) { 2468 case DataType::Type::kUint8: 2469 case DataType::Type::kInt8: 2470 case DataType::Type::kUint16: 2471 case DataType::Type::kInt16: 2472 case DataType::Type::kInt32: 2473 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0))); 2474 // Make the output overlap to please the register allocator. This greatly simplifies 2475 // the validation of the linear scan implementation 2476 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 2477 break; 2478 case DataType::Type::kInt64: { 2479 HInstruction* input = conversion->InputAt(0); 2480 Location input_location = input->IsConstant() 2481 ? Location::ConstantLocation(input->AsConstant()) 2482 : Location::RegisterPairLocation(EAX, EDX); 2483 locations->SetInAt(0, input_location); 2484 // Make the output overlap to please the register allocator. This greatly simplifies 2485 // the validation of the linear scan implementation 2486 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 2487 break; 2488 } 2489 2490 default: 2491 LOG(FATAL) << "Unexpected type conversion from " << input_type 2492 << " to " << result_type; 2493 } 2494 break; 2495 2496 case DataType::Type::kUint16: 2497 case DataType::Type::kInt16: 2498 DCHECK(DataType::IsIntegralType(input_type)) << input_type; 2499 locations->SetInAt(0, Location::Any()); 2500 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2501 break; 2502 2503 case DataType::Type::kInt32: 2504 switch (input_type) { 2505 case DataType::Type::kInt64: 2506 locations->SetInAt(0, Location::Any()); 2507 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2508 break; 2509 2510 case DataType::Type::kFloat32: 2511 locations->SetInAt(0, Location::RequiresFpuRegister()); 2512 locations->SetOut(Location::RequiresRegister()); 2513 locations->AddTemp(Location::RequiresFpuRegister()); 2514 break; 2515 2516 case DataType::Type::kFloat64: 2517 locations->SetInAt(0, Location::RequiresFpuRegister()); 2518 locations->SetOut(Location::RequiresRegister()); 2519 locations->AddTemp(Location::RequiresFpuRegister()); 2520 break; 2521 2522 default: 2523 LOG(FATAL) << "Unexpected type conversion from " << input_type 2524 << " to " << result_type; 2525 } 2526 break; 2527 2528 case DataType::Type::kInt64: 2529 switch (input_type) { 2530 case DataType::Type::kBool: 2531 case DataType::Type::kUint8: 2532 case DataType::Type::kInt8: 2533 case DataType::Type::kUint16: 2534 case DataType::Type::kInt16: 2535 case DataType::Type::kInt32: 2536 locations->SetInAt(0, Location::RegisterLocation(EAX)); 2537 locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); 2538 break; 2539 2540 case DataType::Type::kFloat32: 2541 case DataType::Type::kFloat64: { 2542 InvokeRuntimeCallingConvention calling_convention; 2543 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0); 2544 locations->SetInAt(0, Location::FpuRegisterLocation(parameter)); 2545 2546 // The runtime helper puts the result in EAX, EDX. 2547 locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); 2548 } 2549 break; 2550 2551 default: 2552 LOG(FATAL) << "Unexpected type conversion from " << input_type 2553 << " to " << result_type; 2554 } 2555 break; 2556 2557 case DataType::Type::kFloat32: 2558 switch (input_type) { 2559 case DataType::Type::kBool: 2560 case DataType::Type::kUint8: 2561 case DataType::Type::kInt8: 2562 case DataType::Type::kUint16: 2563 case DataType::Type::kInt16: 2564 case DataType::Type::kInt32: 2565 locations->SetInAt(0, Location::RequiresRegister()); 2566 locations->SetOut(Location::RequiresFpuRegister()); 2567 break; 2568 2569 case DataType::Type::kInt64: 2570 locations->SetInAt(0, Location::Any()); 2571 locations->SetOut(Location::Any()); 2572 break; 2573 2574 case DataType::Type::kFloat64: 2575 locations->SetInAt(0, Location::RequiresFpuRegister()); 2576 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2577 break; 2578 2579 default: 2580 LOG(FATAL) << "Unexpected type conversion from " << input_type 2581 << " to " << result_type; 2582 } 2583 break; 2584 2585 case DataType::Type::kFloat64: 2586 switch (input_type) { 2587 case DataType::Type::kBool: 2588 case DataType::Type::kUint8: 2589 case DataType::Type::kInt8: 2590 case DataType::Type::kUint16: 2591 case DataType::Type::kInt16: 2592 case DataType::Type::kInt32: 2593 locations->SetInAt(0, Location::RequiresRegister()); 2594 locations->SetOut(Location::RequiresFpuRegister()); 2595 break; 2596 2597 case DataType::Type::kInt64: 2598 locations->SetInAt(0, Location::Any()); 2599 locations->SetOut(Location::Any()); 2600 break; 2601 2602 case DataType::Type::kFloat32: 2603 locations->SetInAt(0, Location::RequiresFpuRegister()); 2604 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2605 break; 2606 2607 default: 2608 LOG(FATAL) << "Unexpected type conversion from " << input_type 2609 << " to " << result_type; 2610 } 2611 break; 2612 2613 default: 2614 LOG(FATAL) << "Unexpected type conversion from " << input_type 2615 << " to " << result_type; 2616 } 2617 } 2618 2619 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) { 2620 LocationSummary* locations = conversion->GetLocations(); 2621 Location out = locations->Out(); 2622 Location in = locations->InAt(0); 2623 DataType::Type result_type = conversion->GetResultType(); 2624 DataType::Type input_type = conversion->GetInputType(); 2625 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2626 << input_type << " -> " << result_type; 2627 switch (result_type) { 2628 case DataType::Type::kUint8: 2629 switch (input_type) { 2630 case DataType::Type::kInt8: 2631 case DataType::Type::kUint16: 2632 case DataType::Type::kInt16: 2633 case DataType::Type::kInt32: 2634 if (in.IsRegister()) { 2635 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); 2636 } else { 2637 DCHECK(in.GetConstant()->IsIntConstant()); 2638 int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); 2639 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value))); 2640 } 2641 break; 2642 case DataType::Type::kInt64: 2643 if (in.IsRegisterPair()) { 2644 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>()); 2645 } else { 2646 DCHECK(in.GetConstant()->IsLongConstant()); 2647 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2648 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value))); 2649 } 2650 break; 2651 2652 default: 2653 LOG(FATAL) << "Unexpected type conversion from " << input_type 2654 << " to " << result_type; 2655 } 2656 break; 2657 2658 case DataType::Type::kInt8: 2659 switch (input_type) { 2660 case DataType::Type::kUint8: 2661 case DataType::Type::kUint16: 2662 case DataType::Type::kInt16: 2663 case DataType::Type::kInt32: 2664 if (in.IsRegister()) { 2665 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); 2666 } else { 2667 DCHECK(in.GetConstant()->IsIntConstant()); 2668 int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); 2669 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value))); 2670 } 2671 break; 2672 case DataType::Type::kInt64: 2673 if (in.IsRegisterPair()) { 2674 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>()); 2675 } else { 2676 DCHECK(in.GetConstant()->IsLongConstant()); 2677 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2678 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value))); 2679 } 2680 break; 2681 2682 default: 2683 LOG(FATAL) << "Unexpected type conversion from " << input_type 2684 << " to " << result_type; 2685 } 2686 break; 2687 2688 case DataType::Type::kUint16: 2689 switch (input_type) { 2690 case DataType::Type::kInt8: 2691 case DataType::Type::kInt16: 2692 case DataType::Type::kInt32: 2693 if (in.IsRegister()) { 2694 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>()); 2695 } else if (in.IsStackSlot()) { 2696 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); 2697 } else { 2698 DCHECK(in.GetConstant()->IsIntConstant()); 2699 int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); 2700 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value))); 2701 } 2702 break; 2703 case DataType::Type::kInt64: 2704 if (in.IsRegisterPair()) { 2705 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); 2706 } else if (in.IsDoubleStackSlot()) { 2707 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); 2708 } else { 2709 DCHECK(in.GetConstant()->IsLongConstant()); 2710 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2711 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value))); 2712 } 2713 break; 2714 2715 default: 2716 LOG(FATAL) << "Unexpected type conversion from " << input_type 2717 << " to " << result_type; 2718 } 2719 break; 2720 2721 case DataType::Type::kInt16: 2722 switch (input_type) { 2723 case DataType::Type::kUint16: 2724 case DataType::Type::kInt32: 2725 if (in.IsRegister()) { 2726 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>()); 2727 } else if (in.IsStackSlot()) { 2728 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); 2729 } else { 2730 DCHECK(in.GetConstant()->IsIntConstant()); 2731 int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); 2732 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value))); 2733 } 2734 break; 2735 case DataType::Type::kInt64: 2736 if (in.IsRegisterPair()) { 2737 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); 2738 } else if (in.IsDoubleStackSlot()) { 2739 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); 2740 } else { 2741 DCHECK(in.GetConstant()->IsLongConstant()); 2742 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2743 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value))); 2744 } 2745 break; 2746 2747 default: 2748 LOG(FATAL) << "Unexpected type conversion from " << input_type 2749 << " to " << result_type; 2750 } 2751 break; 2752 2753 case DataType::Type::kInt32: 2754 switch (input_type) { 2755 case DataType::Type::kInt64: 2756 if (in.IsRegisterPair()) { 2757 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); 2758 } else if (in.IsDoubleStackSlot()) { 2759 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); 2760 } else { 2761 DCHECK(in.IsConstant()); 2762 DCHECK(in.GetConstant()->IsLongConstant()); 2763 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2764 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value))); 2765 } 2766 break; 2767 2768 case DataType::Type::kFloat32: { 2769 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2770 Register output = out.AsRegister<Register>(); 2771 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2772 NearLabel done, nan; 2773 2774 __ movl(output, Immediate(kPrimIntMax)); 2775 // temp = int-to-float(output) 2776 __ cvtsi2ss(temp, output); 2777 // if input >= temp goto done 2778 __ comiss(input, temp); 2779 __ j(kAboveEqual, &done); 2780 // if input == NaN goto nan 2781 __ j(kUnordered, &nan); 2782 // output = float-to-int-truncate(input) 2783 __ cvttss2si(output, input); 2784 __ jmp(&done); 2785 __ Bind(&nan); 2786 // output = 0 2787 __ xorl(output, output); 2788 __ Bind(&done); 2789 break; 2790 } 2791 2792 case DataType::Type::kFloat64: { 2793 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2794 Register output = out.AsRegister<Register>(); 2795 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2796 NearLabel done, nan; 2797 2798 __ movl(output, Immediate(kPrimIntMax)); 2799 // temp = int-to-double(output) 2800 __ cvtsi2sd(temp, output); 2801 // if input >= temp goto done 2802 __ comisd(input, temp); 2803 __ j(kAboveEqual, &done); 2804 // if input == NaN goto nan 2805 __ j(kUnordered, &nan); 2806 // output = double-to-int-truncate(input) 2807 __ cvttsd2si(output, input); 2808 __ jmp(&done); 2809 __ Bind(&nan); 2810 // output = 0 2811 __ xorl(output, output); 2812 __ Bind(&done); 2813 break; 2814 } 2815 2816 default: 2817 LOG(FATAL) << "Unexpected type conversion from " << input_type 2818 << " to " << result_type; 2819 } 2820 break; 2821 2822 case DataType::Type::kInt64: 2823 switch (input_type) { 2824 case DataType::Type::kBool: 2825 case DataType::Type::kUint8: 2826 case DataType::Type::kInt8: 2827 case DataType::Type::kUint16: 2828 case DataType::Type::kInt16: 2829 case DataType::Type::kInt32: 2830 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX); 2831 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX); 2832 DCHECK_EQ(in.AsRegister<Register>(), EAX); 2833 __ cdq(); 2834 break; 2835 2836 case DataType::Type::kFloat32: 2837 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc()); 2838 CheckEntrypointTypes<kQuickF2l, int64_t, float>(); 2839 break; 2840 2841 case DataType::Type::kFloat64: 2842 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc()); 2843 CheckEntrypointTypes<kQuickD2l, int64_t, double>(); 2844 break; 2845 2846 default: 2847 LOG(FATAL) << "Unexpected type conversion from " << input_type 2848 << " to " << result_type; 2849 } 2850 break; 2851 2852 case DataType::Type::kFloat32: 2853 switch (input_type) { 2854 case DataType::Type::kBool: 2855 case DataType::Type::kUint8: 2856 case DataType::Type::kInt8: 2857 case DataType::Type::kUint16: 2858 case DataType::Type::kInt16: 2859 case DataType::Type::kInt32: 2860 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>()); 2861 break; 2862 2863 case DataType::Type::kInt64: { 2864 size_t adjustment = 0; 2865 2866 // Create stack space for the call to 2867 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below. 2868 // TODO: enhance register allocator to ask for stack temporaries. 2869 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) { 2870 adjustment = DataType::Size(DataType::Type::kInt64); 2871 __ subl(ESP, Immediate(adjustment)); 2872 } 2873 2874 // Load the value to the FP stack, using temporaries if needed. 2875 PushOntoFPStack(in, 0, adjustment, false, true); 2876 2877 if (out.IsStackSlot()) { 2878 __ fstps(Address(ESP, out.GetStackIndex() + adjustment)); 2879 } else { 2880 __ fstps(Address(ESP, 0)); 2881 Location stack_temp = Location::StackSlot(0); 2882 codegen_->Move32(out, stack_temp); 2883 } 2884 2885 // Remove the temporary stack space we allocated. 2886 if (adjustment != 0) { 2887 __ addl(ESP, Immediate(adjustment)); 2888 } 2889 break; 2890 } 2891 2892 case DataType::Type::kFloat64: 2893 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 2894 break; 2895 2896 default: 2897 LOG(FATAL) << "Unexpected type conversion from " << input_type 2898 << " to " << result_type; 2899 } 2900 break; 2901 2902 case DataType::Type::kFloat64: 2903 switch (input_type) { 2904 case DataType::Type::kBool: 2905 case DataType::Type::kUint8: 2906 case DataType::Type::kInt8: 2907 case DataType::Type::kUint16: 2908 case DataType::Type::kInt16: 2909 case DataType::Type::kInt32: 2910 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>()); 2911 break; 2912 2913 case DataType::Type::kInt64: { 2914 size_t adjustment = 0; 2915 2916 // Create stack space for the call to 2917 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below. 2918 // TODO: enhance register allocator to ask for stack temporaries. 2919 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) { 2920 adjustment = DataType::Size(DataType::Type::kInt64); 2921 __ subl(ESP, Immediate(adjustment)); 2922 } 2923 2924 // Load the value to the FP stack, using temporaries if needed. 2925 PushOntoFPStack(in, 0, adjustment, false, true); 2926 2927 if (out.IsDoubleStackSlot()) { 2928 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment)); 2929 } else { 2930 __ fstpl(Address(ESP, 0)); 2931 Location stack_temp = Location::DoubleStackSlot(0); 2932 codegen_->Move64(out, stack_temp); 2933 } 2934 2935 // Remove the temporary stack space we allocated. 2936 if (adjustment != 0) { 2937 __ addl(ESP, Immediate(adjustment)); 2938 } 2939 break; 2940 } 2941 2942 case DataType::Type::kFloat32: 2943 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 2944 break; 2945 2946 default: 2947 LOG(FATAL) << "Unexpected type conversion from " << input_type 2948 << " to " << result_type; 2949 } 2950 break; 2951 2952 default: 2953 LOG(FATAL) << "Unexpected type conversion from " << input_type 2954 << " to " << result_type; 2955 } 2956 } 2957 2958 void LocationsBuilderX86::VisitAdd(HAdd* add) { 2959 LocationSummary* locations = 2960 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); 2961 switch (add->GetResultType()) { 2962 case DataType::Type::kInt32: { 2963 locations->SetInAt(0, Location::RequiresRegister()); 2964 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); 2965 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2966 break; 2967 } 2968 2969 case DataType::Type::kInt64: { 2970 locations->SetInAt(0, Location::RequiresRegister()); 2971 locations->SetInAt(1, Location::Any()); 2972 locations->SetOut(Location::SameAsFirstInput()); 2973 break; 2974 } 2975 2976 case DataType::Type::kFloat32: 2977 case DataType::Type::kFloat64: { 2978 locations->SetInAt(0, Location::RequiresFpuRegister()); 2979 if (add->InputAt(1)->IsX86LoadFromConstantTable()) { 2980 DCHECK(add->InputAt(1)->IsEmittedAtUseSite()); 2981 } else if (add->InputAt(1)->IsConstant()) { 2982 locations->SetInAt(1, Location::RequiresFpuRegister()); 2983 } else { 2984 locations->SetInAt(1, Location::Any()); 2985 } 2986 locations->SetOut(Location::SameAsFirstInput()); 2987 break; 2988 } 2989 2990 default: 2991 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 2992 UNREACHABLE(); 2993 } 2994 } 2995 2996 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { 2997 LocationSummary* locations = add->GetLocations(); 2998 Location first = locations->InAt(0); 2999 Location second = locations->InAt(1); 3000 Location out = locations->Out(); 3001 3002 switch (add->GetResultType()) { 3003 case DataType::Type::kInt32: { 3004 if (second.IsRegister()) { 3005 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3006 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>()); 3007 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3008 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>()); 3009 } else { 3010 __ leal(out.AsRegister<Register>(), Address( 3011 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0)); 3012 } 3013 } else if (second.IsConstant()) { 3014 int32_t value = second.GetConstant()->AsIntConstant()->GetValue(); 3015 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3016 __ addl(out.AsRegister<Register>(), Immediate(value)); 3017 } else { 3018 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value)); 3019 } 3020 } else { 3021 DCHECK(first.Equals(locations->Out())); 3022 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); 3023 } 3024 break; 3025 } 3026 3027 case DataType::Type::kInt64: { 3028 if (second.IsRegisterPair()) { 3029 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); 3030 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); 3031 } else if (second.IsDoubleStackSlot()) { 3032 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); 3033 __ adcl(first.AsRegisterPairHigh<Register>(), 3034 Address(ESP, second.GetHighStackIndex(kX86WordSize))); 3035 } else { 3036 DCHECK(second.IsConstant()) << second; 3037 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3038 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value))); 3039 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value))); 3040 } 3041 break; 3042 } 3043 3044 case DataType::Type::kFloat32: { 3045 if (second.IsFpuRegister()) { 3046 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3047 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) { 3048 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable(); 3049 DCHECK(const_area->IsEmittedAtUseSite()); 3050 __ addss(first.AsFpuRegister<XmmRegister>(), 3051 codegen_->LiteralFloatAddress( 3052 const_area->GetConstant()->AsFloatConstant()->GetValue(), 3053 const_area->GetBaseMethodAddress(), 3054 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3055 } else { 3056 DCHECK(second.IsStackSlot()); 3057 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3058 } 3059 break; 3060 } 3061 3062 case DataType::Type::kFloat64: { 3063 if (second.IsFpuRegister()) { 3064 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3065 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) { 3066 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable(); 3067 DCHECK(const_area->IsEmittedAtUseSite()); 3068 __ addsd(first.AsFpuRegister<XmmRegister>(), 3069 codegen_->LiteralDoubleAddress( 3070 const_area->GetConstant()->AsDoubleConstant()->GetValue(), 3071 const_area->GetBaseMethodAddress(), 3072 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3073 } else { 3074 DCHECK(second.IsDoubleStackSlot()); 3075 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3076 } 3077 break; 3078 } 3079 3080 default: 3081 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3082 } 3083 } 3084 3085 void LocationsBuilderX86::VisitSub(HSub* sub) { 3086 LocationSummary* locations = 3087 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); 3088 switch (sub->GetResultType()) { 3089 case DataType::Type::kInt32: 3090 case DataType::Type::kInt64: { 3091 locations->SetInAt(0, Location::RequiresRegister()); 3092 locations->SetInAt(1, Location::Any()); 3093 locations->SetOut(Location::SameAsFirstInput()); 3094 break; 3095 } 3096 case DataType::Type::kFloat32: 3097 case DataType::Type::kFloat64: { 3098 locations->SetInAt(0, Location::RequiresFpuRegister()); 3099 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { 3100 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite()); 3101 } else if (sub->InputAt(1)->IsConstant()) { 3102 locations->SetInAt(1, Location::RequiresFpuRegister()); 3103 } else { 3104 locations->SetInAt(1, Location::Any()); 3105 } 3106 locations->SetOut(Location::SameAsFirstInput()); 3107 break; 3108 } 3109 3110 default: 3111 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3112 } 3113 } 3114 3115 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { 3116 LocationSummary* locations = sub->GetLocations(); 3117 Location first = locations->InAt(0); 3118 Location second = locations->InAt(1); 3119 DCHECK(first.Equals(locations->Out())); 3120 switch (sub->GetResultType()) { 3121 case DataType::Type::kInt32: { 3122 if (second.IsRegister()) { 3123 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>()); 3124 } else if (second.IsConstant()) { 3125 __ subl(first.AsRegister<Register>(), 3126 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 3127 } else { 3128 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); 3129 } 3130 break; 3131 } 3132 3133 case DataType::Type::kInt64: { 3134 if (second.IsRegisterPair()) { 3135 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); 3136 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); 3137 } else if (second.IsDoubleStackSlot()) { 3138 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); 3139 __ sbbl(first.AsRegisterPairHigh<Register>(), 3140 Address(ESP, second.GetHighStackIndex(kX86WordSize))); 3141 } else { 3142 DCHECK(second.IsConstant()) << second; 3143 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3144 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value))); 3145 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value))); 3146 } 3147 break; 3148 } 3149 3150 case DataType::Type::kFloat32: { 3151 if (second.IsFpuRegister()) { 3152 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3153 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { 3154 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable(); 3155 DCHECK(const_area->IsEmittedAtUseSite()); 3156 __ subss(first.AsFpuRegister<XmmRegister>(), 3157 codegen_->LiteralFloatAddress( 3158 const_area->GetConstant()->AsFloatConstant()->GetValue(), 3159 const_area->GetBaseMethodAddress(), 3160 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3161 } else { 3162 DCHECK(second.IsStackSlot()); 3163 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3164 } 3165 break; 3166 } 3167 3168 case DataType::Type::kFloat64: { 3169 if (second.IsFpuRegister()) { 3170 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3171 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { 3172 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable(); 3173 DCHECK(const_area->IsEmittedAtUseSite()); 3174 __ subsd(first.AsFpuRegister<XmmRegister>(), 3175 codegen_->LiteralDoubleAddress( 3176 const_area->GetConstant()->AsDoubleConstant()->GetValue(), 3177 const_area->GetBaseMethodAddress(), 3178 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3179 } else { 3180 DCHECK(second.IsDoubleStackSlot()); 3181 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3182 } 3183 break; 3184 } 3185 3186 default: 3187 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3188 } 3189 } 3190 3191 void LocationsBuilderX86::VisitMul(HMul* mul) { 3192 LocationSummary* locations = 3193 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 3194 switch (mul->GetResultType()) { 3195 case DataType::Type::kInt32: 3196 locations->SetInAt(0, Location::RequiresRegister()); 3197 locations->SetInAt(1, Location::Any()); 3198 if (mul->InputAt(1)->IsIntConstant()) { 3199 // Can use 3 operand multiply. 3200 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3201 } else { 3202 locations->SetOut(Location::SameAsFirstInput()); 3203 } 3204 break; 3205 case DataType::Type::kInt64: { 3206 locations->SetInAt(0, Location::RequiresRegister()); 3207 locations->SetInAt(1, Location::Any()); 3208 locations->SetOut(Location::SameAsFirstInput()); 3209 // Needed for imul on 32bits with 64bits output. 3210 locations->AddTemp(Location::RegisterLocation(EAX)); 3211 locations->AddTemp(Location::RegisterLocation(EDX)); 3212 break; 3213 } 3214 case DataType::Type::kFloat32: 3215 case DataType::Type::kFloat64: { 3216 locations->SetInAt(0, Location::RequiresFpuRegister()); 3217 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) { 3218 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite()); 3219 } else if (mul->InputAt(1)->IsConstant()) { 3220 locations->SetInAt(1, Location::RequiresFpuRegister()); 3221 } else { 3222 locations->SetInAt(1, Location::Any()); 3223 } 3224 locations->SetOut(Location::SameAsFirstInput()); 3225 break; 3226 } 3227 3228 default: 3229 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3230 } 3231 } 3232 3233 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { 3234 LocationSummary* locations = mul->GetLocations(); 3235 Location first = locations->InAt(0); 3236 Location second = locations->InAt(1); 3237 Location out = locations->Out(); 3238 3239 switch (mul->GetResultType()) { 3240 case DataType::Type::kInt32: 3241 // The constant may have ended up in a register, so test explicitly to avoid 3242 // problems where the output may not be the same as the first operand. 3243 if (mul->InputAt(1)->IsIntConstant()) { 3244 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue()); 3245 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm); 3246 } else if (second.IsRegister()) { 3247 DCHECK(first.Equals(out)); 3248 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>()); 3249 } else { 3250 DCHECK(second.IsStackSlot()); 3251 DCHECK(first.Equals(out)); 3252 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); 3253 } 3254 break; 3255 3256 case DataType::Type::kInt64: { 3257 Register in1_hi = first.AsRegisterPairHigh<Register>(); 3258 Register in1_lo = first.AsRegisterPairLow<Register>(); 3259 Register eax = locations->GetTemp(0).AsRegister<Register>(); 3260 Register edx = locations->GetTemp(1).AsRegister<Register>(); 3261 3262 DCHECK_EQ(EAX, eax); 3263 DCHECK_EQ(EDX, edx); 3264 3265 // input: in1 - 64 bits, in2 - 64 bits. 3266 // output: in1 3267 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo 3268 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] 3269 // parts: in1.lo = (in1.lo * in2.lo)[31:0] 3270 if (second.IsConstant()) { 3271 DCHECK(second.GetConstant()->IsLongConstant()); 3272 3273 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3274 int32_t low_value = Low32Bits(value); 3275 int32_t high_value = High32Bits(value); 3276 Immediate low(low_value); 3277 Immediate high(high_value); 3278 3279 __ movl(eax, high); 3280 // eax <- in1.lo * in2.hi 3281 __ imull(eax, in1_lo); 3282 // in1.hi <- in1.hi * in2.lo 3283 __ imull(in1_hi, low); 3284 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo 3285 __ addl(in1_hi, eax); 3286 // move in2_lo to eax to prepare for double precision 3287 __ movl(eax, low); 3288 // edx:eax <- in1.lo * in2.lo 3289 __ mull(in1_lo); 3290 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] 3291 __ addl(in1_hi, edx); 3292 // in1.lo <- (in1.lo * in2.lo)[31:0]; 3293 __ movl(in1_lo, eax); 3294 } else if (second.IsRegisterPair()) { 3295 Register in2_hi = second.AsRegisterPairHigh<Register>(); 3296 Register in2_lo = second.AsRegisterPairLow<Register>(); 3297 3298 __ movl(eax, in2_hi); 3299 // eax <- in1.lo * in2.hi 3300 __ imull(eax, in1_lo); 3301 // in1.hi <- in1.hi * in2.lo 3302 __ imull(in1_hi, in2_lo); 3303 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo 3304 __ addl(in1_hi, eax); 3305 // move in1_lo to eax to prepare for double precision 3306 __ movl(eax, in1_lo); 3307 // edx:eax <- in1.lo * in2.lo 3308 __ mull(in2_lo); 3309 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] 3310 __ addl(in1_hi, edx); 3311 // in1.lo <- (in1.lo * in2.lo)[31:0]; 3312 __ movl(in1_lo, eax); 3313 } else { 3314 DCHECK(second.IsDoubleStackSlot()) << second; 3315 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); 3316 Address in2_lo(ESP, second.GetStackIndex()); 3317 3318 __ movl(eax, in2_hi); 3319 // eax <- in1.lo * in2.hi 3320 __ imull(eax, in1_lo); 3321 // in1.hi <- in1.hi * in2.lo 3322 __ imull(in1_hi, in2_lo); 3323 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo 3324 __ addl(in1_hi, eax); 3325 // move in1_lo to eax to prepare for double precision 3326 __ movl(eax, in1_lo); 3327 // edx:eax <- in1.lo * in2.lo 3328 __ mull(in2_lo); 3329 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] 3330 __ addl(in1_hi, edx); 3331 // in1.lo <- (in1.lo * in2.lo)[31:0]; 3332 __ movl(in1_lo, eax); 3333 } 3334 3335 break; 3336 } 3337 3338 case DataType::Type::kFloat32: { 3339 DCHECK(first.Equals(locations->Out())); 3340 if (second.IsFpuRegister()) { 3341 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3342 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) { 3343 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable(); 3344 DCHECK(const_area->IsEmittedAtUseSite()); 3345 __ mulss(first.AsFpuRegister<XmmRegister>(), 3346 codegen_->LiteralFloatAddress( 3347 const_area->GetConstant()->AsFloatConstant()->GetValue(), 3348 const_area->GetBaseMethodAddress(), 3349 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3350 } else { 3351 DCHECK(second.IsStackSlot()); 3352 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3353 } 3354 break; 3355 } 3356 3357 case DataType::Type::kFloat64: { 3358 DCHECK(first.Equals(locations->Out())); 3359 if (second.IsFpuRegister()) { 3360 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3361 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) { 3362 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable(); 3363 DCHECK(const_area->IsEmittedAtUseSite()); 3364 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3365 codegen_->LiteralDoubleAddress( 3366 const_area->GetConstant()->AsDoubleConstant()->GetValue(), 3367 const_area->GetBaseMethodAddress(), 3368 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3369 } else { 3370 DCHECK(second.IsDoubleStackSlot()); 3371 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3372 } 3373 break; 3374 } 3375 3376 default: 3377 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3378 } 3379 } 3380 3381 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, 3382 uint32_t temp_offset, 3383 uint32_t stack_adjustment, 3384 bool is_fp, 3385 bool is_wide) { 3386 if (source.IsStackSlot()) { 3387 DCHECK(!is_wide); 3388 if (is_fp) { 3389 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); 3390 } else { 3391 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment)); 3392 } 3393 } else if (source.IsDoubleStackSlot()) { 3394 DCHECK(is_wide); 3395 if (is_fp) { 3396 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); 3397 } else { 3398 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment)); 3399 } 3400 } else { 3401 // Write the value to the temporary location on the stack and load to FP stack. 3402 if (!is_wide) { 3403 Location stack_temp = Location::StackSlot(temp_offset); 3404 codegen_->Move32(stack_temp, source); 3405 if (is_fp) { 3406 __ flds(Address(ESP, temp_offset)); 3407 } else { 3408 __ filds(Address(ESP, temp_offset)); 3409 } 3410 } else { 3411 Location stack_temp = Location::DoubleStackSlot(temp_offset); 3412 codegen_->Move64(stack_temp, source); 3413 if (is_fp) { 3414 __ fldl(Address(ESP, temp_offset)); 3415 } else { 3416 __ fildl(Address(ESP, temp_offset)); 3417 } 3418 } 3419 } 3420 } 3421 3422 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { 3423 DataType::Type type = rem->GetResultType(); 3424 bool is_float = type == DataType::Type::kFloat32; 3425 size_t elem_size = DataType::Size(type); 3426 LocationSummary* locations = rem->GetLocations(); 3427 Location first = locations->InAt(0); 3428 Location second = locations->InAt(1); 3429 Location out = locations->Out(); 3430 3431 // Create stack space for 2 elements. 3432 // TODO: enhance register allocator to ask for stack temporaries. 3433 __ subl(ESP, Immediate(2 * elem_size)); 3434 3435 // Load the values to the FP stack in reverse order, using temporaries if needed. 3436 const bool is_wide = !is_float; 3437 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide); 3438 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide); 3439 3440 // Loop doing FPREM until we stabilize. 3441 NearLabel retry; 3442 __ Bind(&retry); 3443 __ fprem(); 3444 3445 // Move FP status to AX. 3446 __ fstsw(); 3447 3448 // And see if the argument reduction is complete. This is signaled by the 3449 // C2 FPU flag bit set to 0. 3450 __ andl(EAX, Immediate(kC2ConditionMask)); 3451 __ j(kNotEqual, &retry); 3452 3453 // We have settled on the final value. Retrieve it into an XMM register. 3454 // Store FP top of stack to real stack. 3455 if (is_float) { 3456 __ fsts(Address(ESP, 0)); 3457 } else { 3458 __ fstl(Address(ESP, 0)); 3459 } 3460 3461 // Pop the 2 items from the FP stack. 3462 __ fucompp(); 3463 3464 // Load the value from the stack into an XMM register. 3465 DCHECK(out.IsFpuRegister()) << out; 3466 if (is_float) { 3467 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 3468 } else { 3469 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 3470 } 3471 3472 // And remove the temporary stack space we allocated. 3473 __ addl(ESP, Immediate(2 * elem_size)); 3474 } 3475 3476 3477 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 3478 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3479 3480 LocationSummary* locations = instruction->GetLocations(); 3481 DCHECK(locations->InAt(1).IsConstant()); 3482 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant()); 3483 3484 Register out_register = locations->Out().AsRegister<Register>(); 3485 Register input_register = locations->InAt(0).AsRegister<Register>(); 3486 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 3487 3488 DCHECK(imm == 1 || imm == -1); 3489 3490 if (instruction->IsRem()) { 3491 __ xorl(out_register, out_register); 3492 } else { 3493 __ movl(out_register, input_register); 3494 if (imm == -1) { 3495 __ negl(out_register); 3496 } 3497 } 3498 } 3499 3500 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) { 3501 LocationSummary* locations = instruction->GetLocations(); 3502 Location second = locations->InAt(1); 3503 3504 Register out = locations->Out().AsRegister<Register>(); 3505 Register numerator = locations->InAt(0).AsRegister<Register>(); 3506 3507 int32_t imm = Int64FromConstant(second.GetConstant()); 3508 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3509 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); 3510 3511 Register tmp = locations->GetTemp(0).AsRegister<Register>(); 3512 NearLabel done; 3513 __ movl(out, numerator); 3514 __ andl(out, Immediate(abs_imm-1)); 3515 __ j(Condition::kZero, &done); 3516 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); 3517 __ testl(numerator, numerator); 3518 __ cmovl(Condition::kLess, out, tmp); 3519 __ Bind(&done); 3520 } 3521 3522 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { 3523 LocationSummary* locations = instruction->GetLocations(); 3524 3525 Register out_register = locations->Out().AsRegister<Register>(); 3526 Register input_register = locations->InAt(0).AsRegister<Register>(); 3527 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 3528 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3529 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); 3530 3531 Register num = locations->GetTemp(0).AsRegister<Register>(); 3532 3533 __ leal(num, Address(input_register, abs_imm - 1)); 3534 __ testl(input_register, input_register); 3535 __ cmovl(kGreaterEqual, num, input_register); 3536 int shift = CTZ(imm); 3537 __ sarl(num, Immediate(shift)); 3538 3539 if (imm < 0) { 3540 __ negl(num); 3541 } 3542 3543 __ movl(out_register, num); 3544 } 3545 3546 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3547 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3548 3549 LocationSummary* locations = instruction->GetLocations(); 3550 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 3551 3552 Register eax = locations->InAt(0).AsRegister<Register>(); 3553 Register out = locations->Out().AsRegister<Register>(); 3554 Register num; 3555 Register edx; 3556 3557 if (instruction->IsDiv()) { 3558 edx = locations->GetTemp(0).AsRegister<Register>(); 3559 num = locations->GetTemp(1).AsRegister<Register>(); 3560 } else { 3561 edx = locations->Out().AsRegister<Register>(); 3562 num = locations->GetTemp(0).AsRegister<Register>(); 3563 } 3564 3565 DCHECK_EQ(EAX, eax); 3566 DCHECK_EQ(EDX, edx); 3567 if (instruction->IsDiv()) { 3568 DCHECK_EQ(EAX, out); 3569 } else { 3570 DCHECK_EQ(EDX, out); 3571 } 3572 3573 int64_t magic; 3574 int shift; 3575 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift); 3576 3577 // Save the numerator. 3578 __ movl(num, eax); 3579 3580 // EAX = magic 3581 __ movl(eax, Immediate(magic)); 3582 3583 // EDX:EAX = magic * numerator 3584 __ imull(num); 3585 3586 if (imm > 0 && magic < 0) { 3587 // EDX += num 3588 __ addl(edx, num); 3589 } else if (imm < 0 && magic > 0) { 3590 __ subl(edx, num); 3591 } 3592 3593 // Shift if needed. 3594 if (shift != 0) { 3595 __ sarl(edx, Immediate(shift)); 3596 } 3597 3598 // EDX += 1 if EDX < 0 3599 __ movl(eax, edx); 3600 __ shrl(edx, Immediate(31)); 3601 __ addl(edx, eax); 3602 3603 if (instruction->IsRem()) { 3604 __ movl(eax, num); 3605 __ imull(edx, Immediate(imm)); 3606 __ subl(eax, edx); 3607 __ movl(edx, eax); 3608 } else { 3609 __ movl(eax, edx); 3610 } 3611 } 3612 3613 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { 3614 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3615 3616 LocationSummary* locations = instruction->GetLocations(); 3617 Location out = locations->Out(); 3618 Location first = locations->InAt(0); 3619 Location second = locations->InAt(1); 3620 bool is_div = instruction->IsDiv(); 3621 3622 switch (instruction->GetResultType()) { 3623 case DataType::Type::kInt32: { 3624 DCHECK_EQ(EAX, first.AsRegister<Register>()); 3625 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>()); 3626 3627 if (second.IsConstant()) { 3628 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); 3629 3630 if (imm == 0) { 3631 // Do not generate anything for 0. DivZeroCheck would forbid any generated code. 3632 } else if (imm == 1 || imm == -1) { 3633 DivRemOneOrMinusOne(instruction); 3634 } else if (IsPowerOfTwo(AbsOrMin(imm))) { 3635 if (is_div) { 3636 DivByPowerOfTwo(instruction->AsDiv()); 3637 } else { 3638 RemByPowerOfTwo(instruction->AsRem()); 3639 } 3640 } else { 3641 DCHECK(imm <= -2 || imm >= 2); 3642 GenerateDivRemWithAnyConstant(instruction); 3643 } 3644 } else { 3645 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86( 3646 instruction, out.AsRegister<Register>(), is_div); 3647 codegen_->AddSlowPath(slow_path); 3648 3649 Register second_reg = second.AsRegister<Register>(); 3650 // 0x80000000/-1 triggers an arithmetic exception! 3651 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so 3652 // it's safe to just use negl instead of more complex comparisons. 3653 3654 __ cmpl(second_reg, Immediate(-1)); 3655 __ j(kEqual, slow_path->GetEntryLabel()); 3656 3657 // edx:eax <- sign-extended of eax 3658 __ cdq(); 3659 // eax = quotient, edx = remainder 3660 __ idivl(second_reg); 3661 __ Bind(slow_path->GetExitLabel()); 3662 } 3663 break; 3664 } 3665 3666 case DataType::Type::kInt64: { 3667 InvokeRuntimeCallingConvention calling_convention; 3668 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>()); 3669 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>()); 3670 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>()); 3671 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>()); 3672 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>()); 3673 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>()); 3674 3675 if (is_div) { 3676 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); 3677 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); 3678 } else { 3679 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); 3680 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); 3681 } 3682 break; 3683 } 3684 3685 default: 3686 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType(); 3687 } 3688 } 3689 3690 void LocationsBuilderX86::VisitDiv(HDiv* div) { 3691 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64) 3692 ? LocationSummary::kCallOnMainOnly 3693 : LocationSummary::kNoCall; 3694 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind); 3695 3696 switch (div->GetResultType()) { 3697 case DataType::Type::kInt32: { 3698 locations->SetInAt(0, Location::RegisterLocation(EAX)); 3699 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3700 locations->SetOut(Location::SameAsFirstInput()); 3701 // Intel uses edx:eax as the dividend. 3702 locations->AddTemp(Location::RegisterLocation(EDX)); 3703 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way 3704 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as 3705 // output and request another temp. 3706 if (div->InputAt(1)->IsIntConstant()) { 3707 locations->AddTemp(Location::RequiresRegister()); 3708 } 3709 break; 3710 } 3711 case DataType::Type::kInt64: { 3712 InvokeRuntimeCallingConvention calling_convention; 3713 locations->SetInAt(0, Location::RegisterPairLocation( 3714 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); 3715 locations->SetInAt(1, Location::RegisterPairLocation( 3716 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); 3717 // Runtime helper puts the result in EAX, EDX. 3718 locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); 3719 break; 3720 } 3721 case DataType::Type::kFloat32: 3722 case DataType::Type::kFloat64: { 3723 locations->SetInAt(0, Location::RequiresFpuRegister()); 3724 if (div->InputAt(1)->IsX86LoadFromConstantTable()) { 3725 DCHECK(div->InputAt(1)->IsEmittedAtUseSite()); 3726 } else if (div->InputAt(1)->IsConstant()) { 3727 locations->SetInAt(1, Location::RequiresFpuRegister()); 3728 } else { 3729 locations->SetInAt(1, Location::Any()); 3730 } 3731 locations->SetOut(Location::SameAsFirstInput()); 3732 break; 3733 } 3734 3735 default: 3736 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3737 } 3738 } 3739 3740 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { 3741 LocationSummary* locations = div->GetLocations(); 3742 Location first = locations->InAt(0); 3743 Location second = locations->InAt(1); 3744 3745 switch (div->GetResultType()) { 3746 case DataType::Type::kInt32: 3747 case DataType::Type::kInt64: { 3748 GenerateDivRemIntegral(div); 3749 break; 3750 } 3751 3752 case DataType::Type::kFloat32: { 3753 if (second.IsFpuRegister()) { 3754 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3755 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) { 3756 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable(); 3757 DCHECK(const_area->IsEmittedAtUseSite()); 3758 __ divss(first.AsFpuRegister<XmmRegister>(), 3759 codegen_->LiteralFloatAddress( 3760 const_area->GetConstant()->AsFloatConstant()->GetValue(), 3761 const_area->GetBaseMethodAddress(), 3762 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3763 } else { 3764 DCHECK(second.IsStackSlot()); 3765 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3766 } 3767 break; 3768 } 3769 3770 case DataType::Type::kFloat64: { 3771 if (second.IsFpuRegister()) { 3772 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3773 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) { 3774 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable(); 3775 DCHECK(const_area->IsEmittedAtUseSite()); 3776 __ divsd(first.AsFpuRegister<XmmRegister>(), 3777 codegen_->LiteralDoubleAddress( 3778 const_area->GetConstant()->AsDoubleConstant()->GetValue(), 3779 const_area->GetBaseMethodAddress(), 3780 const_area->GetLocations()->InAt(0).AsRegister<Register>())); 3781 } else { 3782 DCHECK(second.IsDoubleStackSlot()); 3783 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); 3784 } 3785 break; 3786 } 3787 3788 default: 3789 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3790 } 3791 } 3792 3793 void LocationsBuilderX86::VisitRem(HRem* rem) { 3794 DataType::Type type = rem->GetResultType(); 3795 3796 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64) 3797 ? LocationSummary::kCallOnMainOnly 3798 : LocationSummary::kNoCall; 3799 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); 3800 3801 switch (type) { 3802 case DataType::Type::kInt32: { 3803 locations->SetInAt(0, Location::RegisterLocation(EAX)); 3804 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 3805 locations->SetOut(Location::RegisterLocation(EDX)); 3806 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way 3807 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as 3808 // output and request another temp. 3809 if (rem->InputAt(1)->IsIntConstant()) { 3810 locations->AddTemp(Location::RequiresRegister()); 3811 } 3812 break; 3813 } 3814 case DataType::Type::kInt64: { 3815 InvokeRuntimeCallingConvention calling_convention; 3816 locations->SetInAt(0, Location::RegisterPairLocation( 3817 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); 3818 locations->SetInAt(1, Location::RegisterPairLocation( 3819 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); 3820 // Runtime helper puts the result in EAX, EDX. 3821 locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); 3822 break; 3823 } 3824 case DataType::Type::kFloat64: 3825 case DataType::Type::kFloat32: { 3826 locations->SetInAt(0, Location::Any()); 3827 locations->SetInAt(1, Location::Any()); 3828 locations->SetOut(Location::RequiresFpuRegister()); 3829 locations->AddTemp(Location::RegisterLocation(EAX)); 3830 break; 3831 } 3832 3833 default: 3834 LOG(FATAL) << "Unexpected rem type " << type; 3835 } 3836 } 3837 3838 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { 3839 DataType::Type type = rem->GetResultType(); 3840 switch (type) { 3841 case DataType::Type::kInt32: 3842 case DataType::Type::kInt64: { 3843 GenerateDivRemIntegral(rem); 3844 break; 3845 } 3846 case DataType::Type::kFloat32: 3847 case DataType::Type::kFloat64: { 3848 GenerateRemFP(rem); 3849 break; 3850 } 3851 default: 3852 LOG(FATAL) << "Unexpected rem type " << type; 3853 } 3854 } 3855 3856 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { 3857 LocationSummary* locations = new (allocator) LocationSummary(minmax); 3858 switch (minmax->GetResultType()) { 3859 case DataType::Type::kInt32: 3860 locations->SetInAt(0, Location::RequiresRegister()); 3861 locations->SetInAt(1, Location::RequiresRegister()); 3862 locations->SetOut(Location::SameAsFirstInput()); 3863 break; 3864 case DataType::Type::kInt64: 3865 locations->SetInAt(0, Location::RequiresRegister()); 3866 locations->SetInAt(1, Location::RequiresRegister()); 3867 locations->SetOut(Location::SameAsFirstInput()); 3868 // Register to use to perform a long subtract to set cc. 3869 locations->AddTemp(Location::RequiresRegister()); 3870 break; 3871 case DataType::Type::kFloat32: 3872 locations->SetInAt(0, Location::RequiresFpuRegister()); 3873 locations->SetInAt(1, Location::RequiresFpuRegister()); 3874 locations->SetOut(Location::SameAsFirstInput()); 3875 locations->AddTemp(Location::RequiresRegister()); 3876 break; 3877 case DataType::Type::kFloat64: 3878 locations->SetInAt(0, Location::RequiresFpuRegister()); 3879 locations->SetInAt(1, Location::RequiresFpuRegister()); 3880 locations->SetOut(Location::SameAsFirstInput()); 3881 break; 3882 default: 3883 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); 3884 } 3885 } 3886 3887 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations, 3888 bool is_min, 3889 DataType::Type type) { 3890 Location op1_loc = locations->InAt(0); 3891 Location op2_loc = locations->InAt(1); 3892 3893 // Shortcut for same input locations. 3894 if (op1_loc.Equals(op2_loc)) { 3895 // Can return immediately, as op1_loc == out_loc. 3896 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 3897 // a copy here. 3898 DCHECK(locations->Out().Equals(op1_loc)); 3899 return; 3900 } 3901 3902 if (type == DataType::Type::kInt64) { 3903 // Need to perform a subtract to get the sign right. 3904 // op1 is already in the same location as the output. 3905 Location output = locations->Out(); 3906 Register output_lo = output.AsRegisterPairLow<Register>(); 3907 Register output_hi = output.AsRegisterPairHigh<Register>(); 3908 3909 Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); 3910 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); 3911 3912 // The comparison is performed by subtracting the second operand from 3913 // the first operand and then setting the status flags in the same 3914 // manner as the SUB instruction." 3915 __ cmpl(output_lo, op2_lo); 3916 3917 // Now use a temp and the borrow to finish the subtraction of op2_hi. 3918 Register temp = locations->GetTemp(0).AsRegister<Register>(); 3919 __ movl(temp, output_hi); 3920 __ sbbl(temp, op2_hi); 3921 3922 // Now the condition code is correct. 3923 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; 3924 __ cmovl(cond, output_lo, op2_lo); 3925 __ cmovl(cond, output_hi, op2_hi); 3926 } else { 3927 DCHECK_EQ(type, DataType::Type::kInt32); 3928 Register out = locations->Out().AsRegister<Register>(); 3929 Register op2 = op2_loc.AsRegister<Register>(); 3930 3931 // (out := op1) 3932 // out <=? op2 3933 // if out is min jmp done 3934 // out := op2 3935 // done: 3936 3937 __ cmpl(out, op2); 3938 Condition cond = is_min ? Condition::kGreater : Condition::kLess; 3939 __ cmovl(cond, out, op2); 3940 } 3941 } 3942 3943 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations, 3944 bool is_min, 3945 DataType::Type type) { 3946 Location op1_loc = locations->InAt(0); 3947 Location op2_loc = locations->InAt(1); 3948 Location out_loc = locations->Out(); 3949 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 3950 3951 // Shortcut for same input locations. 3952 if (op1_loc.Equals(op2_loc)) { 3953 DCHECK(out_loc.Equals(op1_loc)); 3954 return; 3955 } 3956 3957 // (out := op1) 3958 // out <=? op2 3959 // if Nan jmp Nan_label 3960 // if out is min jmp done 3961 // if op2 is min jmp op2_label 3962 // handle -0/+0 3963 // jmp done 3964 // Nan_label: 3965 // out := NaN 3966 // op2_label: 3967 // out := op2 3968 // done: 3969 // 3970 // This removes one jmp, but needs to copy one input (op1) to out. 3971 // 3972 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? 3973 3974 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 3975 3976 NearLabel nan, done, op2_label; 3977 if (type == DataType::Type::kFloat64) { 3978 __ ucomisd(out, op2); 3979 } else { 3980 DCHECK_EQ(type, DataType::Type::kFloat32); 3981 __ ucomiss(out, op2); 3982 } 3983 3984 __ j(Condition::kParityEven, &nan); 3985 3986 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 3987 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 3988 3989 // Handle 0.0/-0.0. 3990 if (is_min) { 3991 if (type == DataType::Type::kFloat64) { 3992 __ orpd(out, op2); 3993 } else { 3994 __ orps(out, op2); 3995 } 3996 } else { 3997 if (type == DataType::Type::kFloat64) { 3998 __ andpd(out, op2); 3999 } else { 4000 __ andps(out, op2); 4001 } 4002 } 4003 __ jmp(&done); 4004 4005 // NaN handling. 4006 __ Bind(&nan); 4007 if (type == DataType::Type::kFloat64) { 4008 // TODO: Use a constant from the constant table (requires extra input). 4009 __ LoadLongConstant(out, kDoubleNaN); 4010 } else { 4011 Register constant = locations->GetTemp(0).AsRegister<Register>(); 4012 __ movl(constant, Immediate(kFloatNaN)); 4013 __ movd(out, constant); 4014 } 4015 __ jmp(&done); 4016 4017 // out := op2; 4018 __ Bind(&op2_label); 4019 if (type == DataType::Type::kFloat64) { 4020 __ movsd(out, op2); 4021 } else { 4022 __ movss(out, op2); 4023 } 4024 4025 // Done. 4026 __ Bind(&done); 4027 } 4028 4029 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { 4030 DataType::Type type = minmax->GetResultType(); 4031 switch (type) { 4032 case DataType::Type::kInt32: 4033 case DataType::Type::kInt64: 4034 GenerateMinMaxInt(minmax->GetLocations(), is_min, type); 4035 break; 4036 case DataType::Type::kFloat32: 4037 case DataType::Type::kFloat64: 4038 GenerateMinMaxFP(minmax->GetLocations(), is_min, type); 4039 break; 4040 default: 4041 LOG(FATAL) << "Unexpected type for HMinMax " << type; 4042 } 4043 } 4044 4045 void LocationsBuilderX86::VisitMin(HMin* min) { 4046 CreateMinMaxLocations(GetGraph()->GetAllocator(), min); 4047 } 4048 4049 void InstructionCodeGeneratorX86::VisitMin(HMin* min) { 4050 GenerateMinMax(min, /*is_min*/ true); 4051 } 4052 4053 void LocationsBuilderX86::VisitMax(HMax* max) { 4054 CreateMinMaxLocations(GetGraph()->GetAllocator(), max); 4055 } 4056 4057 void InstructionCodeGeneratorX86::VisitMax(HMax* max) { 4058 GenerateMinMax(max, /*is_min*/ false); 4059 } 4060 4061 void LocationsBuilderX86::VisitAbs(HAbs* abs) { 4062 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); 4063 switch (abs->GetResultType()) { 4064 case DataType::Type::kInt32: 4065 locations->SetInAt(0, Location::RegisterLocation(EAX)); 4066 locations->SetOut(Location::SameAsFirstInput()); 4067 locations->AddTemp(Location::RegisterLocation(EDX)); 4068 break; 4069 case DataType::Type::kInt64: 4070 locations->SetInAt(0, Location::RequiresRegister()); 4071 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 4072 locations->AddTemp(Location::RequiresRegister()); 4073 break; 4074 case DataType::Type::kFloat32: 4075 locations->SetInAt(0, Location::RequiresFpuRegister()); 4076 locations->SetOut(Location::SameAsFirstInput()); 4077 locations->AddTemp(Location::RequiresFpuRegister()); 4078 locations->AddTemp(Location::RequiresRegister()); 4079 break; 4080 case DataType::Type::kFloat64: 4081 locations->SetInAt(0, Location::RequiresFpuRegister()); 4082 locations->SetOut(Location::SameAsFirstInput()); 4083 locations->AddTemp(Location::RequiresFpuRegister()); 4084 break; 4085 default: 4086 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); 4087 } 4088 } 4089 4090 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) { 4091 LocationSummary* locations = abs->GetLocations(); 4092 switch (abs->GetResultType()) { 4093 case DataType::Type::kInt32: { 4094 Register out = locations->Out().AsRegister<Register>(); 4095 DCHECK_EQ(out, EAX); 4096 Register temp = locations->GetTemp(0).AsRegister<Register>(); 4097 DCHECK_EQ(temp, EDX); 4098 // Sign extend EAX into EDX. 4099 __ cdq(); 4100 // XOR EAX with sign. 4101 __ xorl(EAX, EDX); 4102 // Subtract out sign to correct. 4103 __ subl(EAX, EDX); 4104 // The result is in EAX. 4105 break; 4106 } 4107 case DataType::Type::kInt64: { 4108 Location input = locations->InAt(0); 4109 Register input_lo = input.AsRegisterPairLow<Register>(); 4110 Register input_hi = input.AsRegisterPairHigh<Register>(); 4111 Location output = locations->Out(); 4112 Register output_lo = output.AsRegisterPairLow<Register>(); 4113 Register output_hi = output.AsRegisterPairHigh<Register>(); 4114 Register temp = locations->GetTemp(0).AsRegister<Register>(); 4115 // Compute the sign into the temporary. 4116 __ movl(temp, input_hi); 4117 __ sarl(temp, Immediate(31)); 4118 // Store the sign into the output. 4119 __ movl(output_lo, temp); 4120 __ movl(output_hi, temp); 4121 // XOR the input to the output. 4122 __ xorl(output_lo, input_lo); 4123 __ xorl(output_hi, input_hi); 4124 // Subtract the sign. 4125 __ subl(output_lo, temp); 4126 __ sbbl(output_hi, temp); 4127 break; 4128 } 4129 case DataType::Type::kFloat32: { 4130 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 4131 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 4132 Register constant = locations->GetTemp(1).AsRegister<Register>(); 4133 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF))); 4134 __ movd(temp, constant); 4135 __ andps(out, temp); 4136 break; 4137 } 4138 case DataType::Type::kFloat64: { 4139 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 4140 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 4141 // TODO: Use a constant from the constant table (requires extra input). 4142 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF)); 4143 __ andpd(out, temp); 4144 break; 4145 } 4146 default: 4147 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); 4148 } 4149 } 4150 4151 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4152 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 4153 switch (instruction->GetType()) { 4154 case DataType::Type::kBool: 4155 case DataType::Type::kUint8: 4156 case DataType::Type::kInt8: 4157 case DataType::Type::kUint16: 4158 case DataType::Type::kInt16: 4159 case DataType::Type::kInt32: { 4160 locations->SetInAt(0, Location::Any()); 4161 break; 4162 } 4163 case DataType::Type::kInt64: { 4164 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 4165 if (!instruction->IsConstant()) { 4166 locations->AddTemp(Location::RequiresRegister()); 4167 } 4168 break; 4169 } 4170 default: 4171 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); 4172 } 4173 } 4174 4175 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4176 SlowPathCode* slow_path = 4177 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction); 4178 codegen_->AddSlowPath(slow_path); 4179 4180 LocationSummary* locations = instruction->GetLocations(); 4181 Location value = locations->InAt(0); 4182 4183 switch (instruction->GetType()) { 4184 case DataType::Type::kBool: 4185 case DataType::Type::kUint8: 4186 case DataType::Type::kInt8: 4187 case DataType::Type::kUint16: 4188 case DataType::Type::kInt16: 4189 case DataType::Type::kInt32: { 4190 if (value.IsRegister()) { 4191 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>()); 4192 __ j(kEqual, slow_path->GetEntryLabel()); 4193 } else if (value.IsStackSlot()) { 4194 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0)); 4195 __ j(kEqual, slow_path->GetEntryLabel()); 4196 } else { 4197 DCHECK(value.IsConstant()) << value; 4198 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { 4199 __ jmp(slow_path->GetEntryLabel()); 4200 } 4201 } 4202 break; 4203 } 4204 case DataType::Type::kInt64: { 4205 if (value.IsRegisterPair()) { 4206 Register temp = locations->GetTemp(0).AsRegister<Register>(); 4207 __ movl(temp, value.AsRegisterPairLow<Register>()); 4208 __ orl(temp, value.AsRegisterPairHigh<Register>()); 4209 __ j(kEqual, slow_path->GetEntryLabel()); 4210 } else { 4211 DCHECK(value.IsConstant()) << value; 4212 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { 4213 __ jmp(slow_path->GetEntryLabel()); 4214 } 4215 } 4216 break; 4217 } 4218 default: 4219 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType(); 4220 } 4221 } 4222 4223 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { 4224 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4225 4226 LocationSummary* locations = 4227 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); 4228 4229 switch (op->GetResultType()) { 4230 case DataType::Type::kInt32: 4231 case DataType::Type::kInt64: { 4232 // Can't have Location::Any() and output SameAsFirstInput() 4233 locations->SetInAt(0, Location::RequiresRegister()); 4234 // The shift count needs to be in CL or a constant. 4235 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); 4236 locations->SetOut(Location::SameAsFirstInput()); 4237 break; 4238 } 4239 default: 4240 LOG(FATAL) << "Unexpected op type " << op->GetResultType(); 4241 } 4242 } 4243 4244 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { 4245 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4246 4247 LocationSummary* locations = op->GetLocations(); 4248 Location first = locations->InAt(0); 4249 Location second = locations->InAt(1); 4250 DCHECK(first.Equals(locations->Out())); 4251 4252 switch (op->GetResultType()) { 4253 case DataType::Type::kInt32: { 4254 DCHECK(first.IsRegister()); 4255 Register first_reg = first.AsRegister<Register>(); 4256 if (second.IsRegister()) { 4257 Register second_reg = second.AsRegister<Register>(); 4258 DCHECK_EQ(ECX, second_reg); 4259 if (op->IsShl()) { 4260 __ shll(first_reg, second_reg); 4261 } else if (op->IsShr()) { 4262 __ sarl(first_reg, second_reg); 4263 } else { 4264 __ shrl(first_reg, second_reg); 4265 } 4266 } else { 4267 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance; 4268 if (shift == 0) { 4269 return; 4270 } 4271 Immediate imm(shift); 4272 if (op->IsShl()) { 4273 __ shll(first_reg, imm); 4274 } else if (op->IsShr()) { 4275 __ sarl(first_reg, imm); 4276 } else { 4277 __ shrl(first_reg, imm); 4278 } 4279 } 4280 break; 4281 } 4282 case DataType::Type::kInt64: { 4283 if (second.IsRegister()) { 4284 Register second_reg = second.AsRegister<Register>(); 4285 DCHECK_EQ(ECX, second_reg); 4286 if (op->IsShl()) { 4287 GenerateShlLong(first, second_reg); 4288 } else if (op->IsShr()) { 4289 GenerateShrLong(first, second_reg); 4290 } else { 4291 GenerateUShrLong(first, second_reg); 4292 } 4293 } else { 4294 // Shift by a constant. 4295 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance; 4296 // Nothing to do if the shift is 0, as the input is already the output. 4297 if (shift != 0) { 4298 if (op->IsShl()) { 4299 GenerateShlLong(first, shift); 4300 } else if (op->IsShr()) { 4301 GenerateShrLong(first, shift); 4302 } else { 4303 GenerateUShrLong(first, shift); 4304 } 4305 } 4306 } 4307 break; 4308 } 4309 default: 4310 LOG(FATAL) << "Unexpected op type " << op->GetResultType(); 4311 } 4312 } 4313 4314 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { 4315 Register low = loc.AsRegisterPairLow<Register>(); 4316 Register high = loc.AsRegisterPairHigh<Register>(); 4317 if (shift == 1) { 4318 // This is just an addition. 4319 __ addl(low, low); 4320 __ adcl(high, high); 4321 } else if (shift == 32) { 4322 // Shift by 32 is easy. High gets low, and low gets 0. 4323 codegen_->EmitParallelMoves( 4324 loc.ToLow(), 4325 loc.ToHigh(), 4326 DataType::Type::kInt32, 4327 Location::ConstantLocation(GetGraph()->GetIntConstant(0)), 4328 loc.ToLow(), 4329 DataType::Type::kInt32); 4330 } else if (shift > 32) { 4331 // Low part becomes 0. High part is low part << (shift-32). 4332 __ movl(high, low); 4333 __ shll(high, Immediate(shift - 32)); 4334 __ xorl(low, low); 4335 } else { 4336 // Between 1 and 31. 4337 __ shld(high, low, Immediate(shift)); 4338 __ shll(low, Immediate(shift)); 4339 } 4340 } 4341 4342 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { 4343 NearLabel done; 4344 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); 4345 __ shll(loc.AsRegisterPairLow<Register>(), shifter); 4346 __ testl(shifter, Immediate(32)); 4347 __ j(kEqual, &done); 4348 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>()); 4349 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0)); 4350 __ Bind(&done); 4351 } 4352 4353 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { 4354 Register low = loc.AsRegisterPairLow<Register>(); 4355 Register high = loc.AsRegisterPairHigh<Register>(); 4356 if (shift == 32) { 4357 // Need to copy the sign. 4358 DCHECK_NE(low, high); 4359 __ movl(low, high); 4360 __ sarl(high, Immediate(31)); 4361 } else if (shift > 32) { 4362 DCHECK_NE(low, high); 4363 // High part becomes sign. Low part is shifted by shift - 32. 4364 __ movl(low, high); 4365 __ sarl(high, Immediate(31)); 4366 __ sarl(low, Immediate(shift - 32)); 4367 } else { 4368 // Between 1 and 31. 4369 __ shrd(low, high, Immediate(shift)); 4370 __ sarl(high, Immediate(shift)); 4371 } 4372 } 4373 4374 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { 4375 NearLabel done; 4376 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); 4377 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter); 4378 __ testl(shifter, Immediate(32)); 4379 __ j(kEqual, &done); 4380 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>()); 4381 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31)); 4382 __ Bind(&done); 4383 } 4384 4385 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { 4386 Register low = loc.AsRegisterPairLow<Register>(); 4387 Register high = loc.AsRegisterPairHigh<Register>(); 4388 if (shift == 32) { 4389 // Shift by 32 is easy. Low gets high, and high gets 0. 4390 codegen_->EmitParallelMoves( 4391 loc.ToHigh(), 4392 loc.ToLow(), 4393 DataType::Type::kInt32, 4394 Location::ConstantLocation(GetGraph()->GetIntConstant(0)), 4395 loc.ToHigh(), 4396 DataType::Type::kInt32); 4397 } else if (shift > 32) { 4398 // Low part is high >> (shift - 32). High part becomes 0. 4399 __ movl(low, high); 4400 __ shrl(low, Immediate(shift - 32)); 4401 __ xorl(high, high); 4402 } else { 4403 // Between 1 and 31. 4404 __ shrd(low, high, Immediate(shift)); 4405 __ shrl(high, Immediate(shift)); 4406 } 4407 } 4408 4409 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { 4410 NearLabel done; 4411 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); 4412 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter); 4413 __ testl(shifter, Immediate(32)); 4414 __ j(kEqual, &done); 4415 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>()); 4416 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0)); 4417 __ Bind(&done); 4418 } 4419 4420 void LocationsBuilderX86::VisitRor(HRor* ror) { 4421 LocationSummary* locations = 4422 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); 4423 4424 switch (ror->GetResultType()) { 4425 case DataType::Type::kInt64: 4426 // Add the temporary needed. 4427 locations->AddTemp(Location::RequiresRegister()); 4428 FALLTHROUGH_INTENDED; 4429 case DataType::Type::kInt32: 4430 locations->SetInAt(0, Location::RequiresRegister()); 4431 // The shift count needs to be in CL (unless it is a constant). 4432 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1))); 4433 locations->SetOut(Location::SameAsFirstInput()); 4434 break; 4435 default: 4436 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4437 UNREACHABLE(); 4438 } 4439 } 4440 4441 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) { 4442 LocationSummary* locations = ror->GetLocations(); 4443 Location first = locations->InAt(0); 4444 Location second = locations->InAt(1); 4445 4446 if (ror->GetResultType() == DataType::Type::kInt32) { 4447 Register first_reg = first.AsRegister<Register>(); 4448 if (second.IsRegister()) { 4449 Register second_reg = second.AsRegister<Register>(); 4450 __ rorl(first_reg, second_reg); 4451 } else { 4452 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 4453 __ rorl(first_reg, imm); 4454 } 4455 return; 4456 } 4457 4458 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64); 4459 Register first_reg_lo = first.AsRegisterPairLow<Register>(); 4460 Register first_reg_hi = first.AsRegisterPairHigh<Register>(); 4461 Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); 4462 if (second.IsRegister()) { 4463 Register second_reg = second.AsRegister<Register>(); 4464 DCHECK_EQ(second_reg, ECX); 4465 __ movl(temp_reg, first_reg_hi); 4466 __ shrd(first_reg_hi, first_reg_lo, second_reg); 4467 __ shrd(first_reg_lo, temp_reg, second_reg); 4468 __ movl(temp_reg, first_reg_hi); 4469 __ testl(second_reg, Immediate(32)); 4470 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo); 4471 __ cmovl(kNotEqual, first_reg_lo, temp_reg); 4472 } else { 4473 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance; 4474 if (shift_amt == 0) { 4475 // Already fine. 4476 return; 4477 } 4478 if (shift_amt == 32) { 4479 // Just swap. 4480 __ movl(temp_reg, first_reg_lo); 4481 __ movl(first_reg_lo, first_reg_hi); 4482 __ movl(first_reg_hi, temp_reg); 4483 return; 4484 } 4485 4486 Immediate imm(shift_amt); 4487 // Save the constents of the low value. 4488 __ movl(temp_reg, first_reg_lo); 4489 4490 // Shift right into low, feeding bits from high. 4491 __ shrd(first_reg_lo, first_reg_hi, imm); 4492 4493 // Shift right into high, feeding bits from the original low. 4494 __ shrd(first_reg_hi, temp_reg, imm); 4495 4496 // Swap if needed. 4497 if (shift_amt > 32) { 4498 __ movl(temp_reg, first_reg_lo); 4499 __ movl(first_reg_lo, first_reg_hi); 4500 __ movl(first_reg_hi, temp_reg); 4501 } 4502 } 4503 } 4504 4505 void LocationsBuilderX86::VisitShl(HShl* shl) { 4506 HandleShift(shl); 4507 } 4508 4509 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) { 4510 HandleShift(shl); 4511 } 4512 4513 void LocationsBuilderX86::VisitShr(HShr* shr) { 4514 HandleShift(shr); 4515 } 4516 4517 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) { 4518 HandleShift(shr); 4519 } 4520 4521 void LocationsBuilderX86::VisitUShr(HUShr* ushr) { 4522 HandleShift(ushr); 4523 } 4524 4525 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) { 4526 HandleShift(ushr); 4527 } 4528 4529 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { 4530 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4531 instruction, LocationSummary::kCallOnMainOnly); 4532 locations->SetOut(Location::RegisterLocation(EAX)); 4533 InvokeRuntimeCallingConvention calling_convention; 4534 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4535 } 4536 4537 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { 4538 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 4539 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 4540 DCHECK(!codegen_->IsLeafMethod()); 4541 } 4542 4543 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { 4544 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4545 instruction, LocationSummary::kCallOnMainOnly); 4546 locations->SetOut(Location::RegisterLocation(EAX)); 4547 InvokeRuntimeCallingConvention calling_convention; 4548 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4549 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 4550 } 4551 4552 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { 4553 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. 4554 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); 4555 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 4556 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 4557 DCHECK(!codegen_->IsLeafMethod()); 4558 } 4559 4560 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) { 4561 LocationSummary* locations = 4562 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4563 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 4564 if (location.IsStackSlot()) { 4565 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4566 } else if (location.IsDoubleStackSlot()) { 4567 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4568 } 4569 locations->SetOut(location); 4570 } 4571 4572 void InstructionCodeGeneratorX86::VisitParameterValue( 4573 HParameterValue* instruction ATTRIBUTE_UNUSED) { 4574 } 4575 4576 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) { 4577 LocationSummary* locations = 4578 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4579 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); 4580 } 4581 4582 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 4583 } 4584 4585 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) { 4586 LocationSummary* locations = 4587 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4588 locations->SetInAt(0, Location::RequiresRegister()); 4589 locations->SetOut(Location::RequiresRegister()); 4590 } 4591 4592 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) { 4593 LocationSummary* locations = instruction->GetLocations(); 4594 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 4595 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4596 instruction->GetIndex(), kX86PointerSize).SizeValue(); 4597 __ movl(locations->Out().AsRegister<Register>(), 4598 Address(locations->InAt(0).AsRegister<Register>(), method_offset)); 4599 } else { 4600 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4601 instruction->GetIndex(), kX86PointerSize)); 4602 __ movl(locations->Out().AsRegister<Register>(), 4603 Address(locations->InAt(0).AsRegister<Register>(), 4604 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); 4605 // temp = temp->GetImtEntryAt(method_offset); 4606 __ movl(locations->Out().AsRegister<Register>(), 4607 Address(locations->Out().AsRegister<Register>(), method_offset)); 4608 } 4609 } 4610 4611 void LocationsBuilderX86::VisitNot(HNot* not_) { 4612 LocationSummary* locations = 4613 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); 4614 locations->SetInAt(0, Location::RequiresRegister()); 4615 locations->SetOut(Location::SameAsFirstInput()); 4616 } 4617 4618 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) { 4619 LocationSummary* locations = not_->GetLocations(); 4620 Location in = locations->InAt(0); 4621 Location out = locations->Out(); 4622 DCHECK(in.Equals(out)); 4623 switch (not_->GetResultType()) { 4624 case DataType::Type::kInt32: 4625 __ notl(out.AsRegister<Register>()); 4626 break; 4627 4628 case DataType::Type::kInt64: 4629 __ notl(out.AsRegisterPairLow<Register>()); 4630 __ notl(out.AsRegisterPairHigh<Register>()); 4631 break; 4632 4633 default: 4634 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); 4635 } 4636 } 4637 4638 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) { 4639 LocationSummary* locations = 4640 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); 4641 locations->SetInAt(0, Location::RequiresRegister()); 4642 locations->SetOut(Location::SameAsFirstInput()); 4643 } 4644 4645 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) { 4646 LocationSummary* locations = bool_not->GetLocations(); 4647 Location in = locations->InAt(0); 4648 Location out = locations->Out(); 4649 DCHECK(in.Equals(out)); 4650 __ xorl(out.AsRegister<Register>(), Immediate(1)); 4651 } 4652 4653 void LocationsBuilderX86::VisitCompare(HCompare* compare) { 4654 LocationSummary* locations = 4655 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 4656 switch (compare->InputAt(0)->GetType()) { 4657 case DataType::Type::kBool: 4658 case DataType::Type::kUint8: 4659 case DataType::Type::kInt8: 4660 case DataType::Type::kUint16: 4661 case DataType::Type::kInt16: 4662 case DataType::Type::kInt32: 4663 case DataType::Type::kInt64: { 4664 locations->SetInAt(0, Location::RequiresRegister()); 4665 locations->SetInAt(1, Location::Any()); 4666 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4667 break; 4668 } 4669 case DataType::Type::kFloat32: 4670 case DataType::Type::kFloat64: { 4671 locations->SetInAt(0, Location::RequiresFpuRegister()); 4672 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) { 4673 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite()); 4674 } else if (compare->InputAt(1)->IsConstant()) { 4675 locations->SetInAt(1, Location::RequiresFpuRegister()); 4676 } else { 4677 locations->SetInAt(1, Location::Any()); 4678 } 4679 locations->SetOut(Location::RequiresRegister()); 4680 break; 4681 } 4682 default: 4683 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); 4684 } 4685 } 4686 4687 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { 4688 LocationSummary* locations = compare->GetLocations(); 4689 Register out = locations->Out().AsRegister<Register>(); 4690 Location left = locations->InAt(0); 4691 Location right = locations->InAt(1); 4692 4693 NearLabel less, greater, done; 4694 Condition less_cond = kLess; 4695 4696 switch (compare->InputAt(0)->GetType()) { 4697 case DataType::Type::kBool: 4698 case DataType::Type::kUint8: 4699 case DataType::Type::kInt8: 4700 case DataType::Type::kUint16: 4701 case DataType::Type::kInt16: 4702 case DataType::Type::kInt32: { 4703 codegen_->GenerateIntCompare(left, right); 4704 break; 4705 } 4706 case DataType::Type::kInt64: { 4707 Register left_low = left.AsRegisterPairLow<Register>(); 4708 Register left_high = left.AsRegisterPairHigh<Register>(); 4709 int32_t val_low = 0; 4710 int32_t val_high = 0; 4711 bool right_is_const = false; 4712 4713 if (right.IsConstant()) { 4714 DCHECK(right.GetConstant()->IsLongConstant()); 4715 right_is_const = true; 4716 int64_t val = right.GetConstant()->AsLongConstant()->GetValue(); 4717 val_low = Low32Bits(val); 4718 val_high = High32Bits(val); 4719 } 4720 4721 if (right.IsRegisterPair()) { 4722 __ cmpl(left_high, right.AsRegisterPairHigh<Register>()); 4723 } else if (right.IsDoubleStackSlot()) { 4724 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize))); 4725 } else { 4726 DCHECK(right_is_const) << right; 4727 codegen_->Compare32BitValue(left_high, val_high); 4728 } 4729 __ j(kLess, &less); // Signed compare. 4730 __ j(kGreater, &greater); // Signed compare. 4731 if (right.IsRegisterPair()) { 4732 __ cmpl(left_low, right.AsRegisterPairLow<Register>()); 4733 } else if (right.IsDoubleStackSlot()) { 4734 __ cmpl(left_low, Address(ESP, right.GetStackIndex())); 4735 } else { 4736 DCHECK(right_is_const) << right; 4737 codegen_->Compare32BitValue(left_low, val_low); 4738 } 4739 less_cond = kBelow; // for CF (unsigned). 4740 break; 4741 } 4742 case DataType::Type::kFloat32: { 4743 GenerateFPCompare(left, right, compare, false); 4744 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 4745 less_cond = kBelow; // for CF (floats). 4746 break; 4747 } 4748 case DataType::Type::kFloat64: { 4749 GenerateFPCompare(left, right, compare, true); 4750 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 4751 less_cond = kBelow; // for CF (floats). 4752 break; 4753 } 4754 default: 4755 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); 4756 } 4757 4758 __ movl(out, Immediate(0)); 4759 __ j(kEqual, &done); 4760 __ j(less_cond, &less); 4761 4762 __ Bind(&greater); 4763 __ movl(out, Immediate(1)); 4764 __ jmp(&done); 4765 4766 __ Bind(&less); 4767 __ movl(out, Immediate(-1)); 4768 4769 __ Bind(&done); 4770 } 4771 4772 void LocationsBuilderX86::VisitPhi(HPhi* instruction) { 4773 LocationSummary* locations = 4774 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4775 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 4776 locations->SetInAt(i, Location::Any()); 4777 } 4778 locations->SetOut(Location::Any()); 4779 } 4780 4781 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 4782 LOG(FATAL) << "Unreachable"; 4783 } 4784 4785 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { 4786 /* 4787 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. 4788 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. 4789 * For those cases, all we need to ensure is that there is a scheduling barrier in place. 4790 */ 4791 switch (kind) { 4792 case MemBarrierKind::kAnyAny: { 4793 MemoryFence(); 4794 break; 4795 } 4796 case MemBarrierKind::kAnyStore: 4797 case MemBarrierKind::kLoadAny: 4798 case MemBarrierKind::kStoreStore: { 4799 // nop 4800 break; 4801 } 4802 case MemBarrierKind::kNTStoreStore: 4803 // Non-Temporal Store/Store needs an explicit fence. 4804 MemoryFence(/* non-temporal= */ true); 4805 break; 4806 } 4807 } 4808 4809 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( 4810 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 4811 ArtMethod* method ATTRIBUTE_UNUSED) { 4812 return desired_dispatch_info; 4813 } 4814 4815 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, 4816 Register temp) { 4817 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 4818 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 4819 if (!invoke->GetLocations()->Intrinsified()) { 4820 return location.AsRegister<Register>(); 4821 } 4822 // For intrinsics we allow any location, so it may be on the stack. 4823 if (!location.IsRegister()) { 4824 __ movl(temp, Address(ESP, location.GetStackIndex())); 4825 return temp; 4826 } 4827 // For register locations, check if the register was saved. If so, get it from the stack. 4828 // Note: There is a chance that the register was saved but not overwritten, so we could 4829 // save one load. However, since this is just an intrinsic slow path we prefer this 4830 // simple and more robust approach rather that trying to determine if that's the case. 4831 SlowPathCode* slow_path = GetCurrentSlowPath(); 4832 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. 4833 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { 4834 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); 4835 __ movl(temp, Address(ESP, stack_offset)); 4836 return temp; 4837 } 4838 return location.AsRegister<Register>(); 4839 } 4840 4841 void CodeGeneratorX86::GenerateStaticOrDirectCall( 4842 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 4843 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 4844 switch (invoke->GetMethodLoadKind()) { 4845 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 4846 // temp = thread->string_init_entrypoint 4847 uint32_t offset = 4848 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 4849 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset)); 4850 break; 4851 } 4852 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 4853 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 4854 break; 4855 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { 4856 DCHECK(GetCompilerOptions().IsBootImage()); 4857 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, 4858 temp.AsRegister<Register>()); 4859 __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); 4860 RecordBootImageMethodPatch(invoke); 4861 break; 4862 } 4863 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { 4864 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, 4865 temp.AsRegister<Register>()); 4866 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); 4867 RecordBootImageRelRoPatch( 4868 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), 4869 GetBootImageOffset(invoke)); 4870 break; 4871 } 4872 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 4873 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, 4874 temp.AsRegister<Register>()); 4875 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); 4876 RecordMethodBssEntryPatch(invoke); 4877 break; 4878 } 4879 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: 4880 __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); 4881 break; 4882 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 4883 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 4884 return; // No code pointer retrieval; the runtime performs the call directly. 4885 } 4886 } 4887 4888 switch (invoke->GetCodePtrLocation()) { 4889 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 4890 __ call(GetFrameEntryLabel()); 4891 break; 4892 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 4893 // (callee_method + offset_of_quick_compiled_code)() 4894 __ call(Address(callee_method.AsRegister<Register>(), 4895 ArtMethod::EntryPointFromQuickCompiledCodeOffset( 4896 kX86PointerSize).Int32Value())); 4897 break; 4898 } 4899 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4900 4901 DCHECK(!IsLeafMethod()); 4902 } 4903 4904 void CodeGeneratorX86::GenerateVirtualCall( 4905 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 4906 Register temp = temp_in.AsRegister<Register>(); 4907 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4908 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value(); 4909 4910 // Use the calling convention instead of the location of the receiver, as 4911 // intrinsics may have put the receiver in a different register. In the intrinsics 4912 // slow path, the arguments have been moved to the right place, so here we are 4913 // guaranteed that the receiver is the first register of the calling convention. 4914 InvokeDexCallingConvention calling_convention; 4915 Register receiver = calling_convention.GetRegisterAt(0); 4916 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 4917 // /* HeapReference<Class> */ temp = receiver->klass_ 4918 __ movl(temp, Address(receiver, class_offset)); 4919 MaybeRecordImplicitNullCheck(invoke); 4920 // Instead of simply (possibly) unpoisoning `temp` here, we should 4921 // emit a read barrier for the previous class reference load. 4922 // However this is not required in practice, as this is an 4923 // intermediate/temporary reference and because the current 4924 // concurrent copying collector keeps the from-space memory 4925 // intact/accessible until the end of the marking phase (the 4926 // concurrent copying collector may not in the future). 4927 __ MaybeUnpoisonHeapReference(temp); 4928 // temp = temp->GetMethodAt(method_offset); 4929 __ movl(temp, Address(temp, method_offset)); 4930 // call temp->GetEntryPoint(); 4931 __ call(Address( 4932 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); 4933 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4934 } 4935 4936 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, 4937 uint32_t intrinsic_data) { 4938 boot_image_intrinsic_patches_.emplace_back( 4939 method_address, /* target_dex_file= */ nullptr, intrinsic_data); 4940 __ Bind(&boot_image_intrinsic_patches_.back().label); 4941 } 4942 4943 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, 4944 uint32_t boot_image_offset) { 4945 boot_image_method_patches_.emplace_back( 4946 method_address, /* target_dex_file= */ nullptr, boot_image_offset); 4947 __ Bind(&boot_image_method_patches_.back().label); 4948 } 4949 4950 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { 4951 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 4952 HX86ComputeBaseMethodAddress* method_address = 4953 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); 4954 boot_image_method_patches_.emplace_back( 4955 method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); 4956 __ Bind(&boot_image_method_patches_.back().label); 4957 } 4958 4959 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { 4960 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 4961 HX86ComputeBaseMethodAddress* method_address = 4962 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); 4963 // Add the patch entry and bind its label at the end of the instruction. 4964 method_bss_entry_patches_.emplace_back( 4965 method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 4966 __ Bind(&method_bss_entry_patches_.back().label); 4967 } 4968 4969 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) { 4970 HX86ComputeBaseMethodAddress* method_address = 4971 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); 4972 boot_image_type_patches_.emplace_back( 4973 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 4974 __ Bind(&boot_image_type_patches_.back().label); 4975 } 4976 4977 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { 4978 HX86ComputeBaseMethodAddress* method_address = 4979 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); 4980 type_bss_entry_patches_.emplace_back( 4981 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 4982 return &type_bss_entry_patches_.back().label; 4983 } 4984 4985 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) { 4986 HX86ComputeBaseMethodAddress* method_address = 4987 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); 4988 boot_image_string_patches_.emplace_back( 4989 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); 4990 __ Bind(&boot_image_string_patches_.back().label); 4991 } 4992 4993 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { 4994 HX86ComputeBaseMethodAddress* method_address = 4995 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); 4996 string_bss_entry_patches_.emplace_back( 4997 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); 4998 return &string_bss_entry_patches_.back().label; 4999 } 5000 5001 void CodeGeneratorX86::LoadBootImageAddress(Register reg, 5002 uint32_t boot_image_reference, 5003 HInvokeStaticOrDirect* invoke) { 5004 if (GetCompilerOptions().IsBootImage()) { 5005 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 5006 HX86ComputeBaseMethodAddress* method_address = 5007 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); 5008 DCHECK(method_address != nullptr); 5009 Register method_address_reg = 5010 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); 5011 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); 5012 RecordBootImageIntrinsicPatch(method_address, boot_image_reference); 5013 } else if (GetCompilerOptions().GetCompilePic()) { 5014 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 5015 HX86ComputeBaseMethodAddress* method_address = 5016 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); 5017 DCHECK(method_address != nullptr); 5018 Register method_address_reg = 5019 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); 5020 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); 5021 RecordBootImageRelRoPatch(method_address, boot_image_reference); 5022 } else { 5023 DCHECK(Runtime::Current()->UseJitCompilation()); 5024 gc::Heap* heap = Runtime::Current()->GetHeap(); 5025 DCHECK(!heap->GetBootImageSpaces().empty()); 5026 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; 5027 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); 5028 } 5029 } 5030 5031 void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, 5032 uint32_t boot_image_offset) { 5033 DCHECK(invoke->IsStatic()); 5034 InvokeRuntimeCallingConvention calling_convention; 5035 Register argument = calling_convention.GetRegisterAt(0); 5036 if (GetCompilerOptions().IsBootImage()) { 5037 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); 5038 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. 5039 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); 5040 HX86ComputeBaseMethodAddress* method_address = 5041 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); 5042 DCHECK(method_address != nullptr); 5043 Register method_address_reg = 5044 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); 5045 __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); 5046 MethodReference target_method = invoke->GetTargetMethod(); 5047 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; 5048 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_); 5049 __ Bind(&boot_image_type_patches_.back().label); 5050 } else { 5051 LoadBootImageAddress(argument, boot_image_offset, invoke); 5052 } 5053 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 5054 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 5055 } 5056 5057 // The label points to the end of the "movl" or another instruction but the literal offset 5058 // for method patch needs to point to the embedded constant which occupies the last 4 bytes. 5059 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; 5060 5061 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 5062 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( 5063 const ArenaDeque<X86PcRelativePatchInfo>& infos, 5064 ArenaVector<linker::LinkerPatch>* linker_patches) { 5065 for (const X86PcRelativePatchInfo& info : infos) { 5066 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 5067 linker_patches->push_back(Factory(literal_offset, 5068 info.target_dex_file, 5069 GetMethodAddressOffset(info.method_address), 5070 info.offset_or_index)); 5071 } 5072 } 5073 5074 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> 5075 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, 5076 const DexFile* target_dex_file, 5077 uint32_t pc_insn_offset, 5078 uint32_t boot_image_offset) { 5079 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. 5080 return Factory(literal_offset, pc_insn_offset, boot_image_offset); 5081 } 5082 5083 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 5084 DCHECK(linker_patches->empty()); 5085 size_t size = 5086 boot_image_method_patches_.size() + 5087 method_bss_entry_patches_.size() + 5088 boot_image_type_patches_.size() + 5089 type_bss_entry_patches_.size() + 5090 boot_image_string_patches_.size() + 5091 string_bss_entry_patches_.size() + 5092 boot_image_intrinsic_patches_.size(); 5093 linker_patches->reserve(size); 5094 if (GetCompilerOptions().IsBootImage()) { 5095 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 5096 boot_image_method_patches_, linker_patches); 5097 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 5098 boot_image_type_patches_, linker_patches); 5099 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 5100 boot_image_string_patches_, linker_patches); 5101 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( 5102 boot_image_intrinsic_patches_, linker_patches); 5103 } else { 5104 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( 5105 boot_image_method_patches_, linker_patches); 5106 DCHECK(boot_image_type_patches_.empty()); 5107 DCHECK(boot_image_string_patches_.empty()); 5108 DCHECK(boot_image_intrinsic_patches_.empty()); 5109 } 5110 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 5111 method_bss_entry_patches_, linker_patches); 5112 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 5113 type_bss_entry_patches_, linker_patches); 5114 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 5115 string_bss_entry_patches_, linker_patches); 5116 DCHECK_EQ(size, linker_patches->size()); 5117 } 5118 5119 void CodeGeneratorX86::MarkGCCard(Register temp, 5120 Register card, 5121 Register object, 5122 Register value, 5123 bool value_can_be_null) { 5124 NearLabel is_null; 5125 if (value_can_be_null) { 5126 __ testl(value, value); 5127 __ j(kEqual, &is_null); 5128 } 5129 // Load the address of the card table into `card`. 5130 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value())); 5131 // Calculate the offset (in the card table) of the card corresponding to 5132 // `object`. 5133 __ movl(temp, object); 5134 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); 5135 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the 5136 // `object`'s card. 5137 // 5138 // Register `card` contains the address of the card table. Note that the card 5139 // table's base is biased during its creation so that it always starts at an 5140 // address whose least-significant byte is equal to `kCardDirty` (see 5141 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction 5142 // below writes the `kCardDirty` (byte) value into the `object`'s card 5143 // (located at `card + object >> kCardShift`). 5144 // 5145 // This dual use of the value in register `card` (1. to calculate the location 5146 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load 5147 // (no need to explicitly load `kCardDirty` as an immediate value). 5148 __ movb(Address(temp, card, TIMES_1, 0), 5149 X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); 5150 if (value_can_be_null) { 5151 __ Bind(&is_null); 5152 } 5153 } 5154 5155 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { 5156 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 5157 5158 bool object_field_get_with_read_barrier = 5159 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 5160 LocationSummary* locations = 5161 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 5162 kEmitCompilerReadBarrier 5163 ? LocationSummary::kCallOnSlowPath 5164 : LocationSummary::kNoCall); 5165 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 5166 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5167 } 5168 locations->SetInAt(0, Location::RequiresRegister()); 5169 5170 if (DataType::IsFloatingPointType(instruction->GetType())) { 5171 locations->SetOut(Location::RequiresFpuRegister()); 5172 } else { 5173 // The output overlaps in case of long: we don't want the low move 5174 // to overwrite the object's location. Likewise, in the case of 5175 // an object field get with read barriers enabled, we do not want 5176 // the move to overwrite the object's location, as we need it to emit 5177 // the read barrier. 5178 locations->SetOut( 5179 Location::RequiresRegister(), 5180 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ? 5181 Location::kOutputOverlap : 5182 Location::kNoOutputOverlap); 5183 } 5184 5185 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) { 5186 // Long values can be loaded atomically into an XMM using movsd. 5187 // So we use an XMM register as a temp to achieve atomicity (first 5188 // load the temp into the XMM and then copy the XMM into the 5189 // output, 32 bits at a time). 5190 locations->AddTemp(Location::RequiresFpuRegister()); 5191 } 5192 } 5193 5194 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, 5195 const FieldInfo& field_info) { 5196 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 5197 5198 LocationSummary* locations = instruction->GetLocations(); 5199 Location base_loc = locations->InAt(0); 5200 Register base = base_loc.AsRegister<Register>(); 5201 Location out = locations->Out(); 5202 bool is_volatile = field_info.IsVolatile(); 5203 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 5204 DataType::Type load_type = instruction->GetType(); 5205 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 5206 5207 switch (load_type) { 5208 case DataType::Type::kBool: 5209 case DataType::Type::kUint8: { 5210 __ movzxb(out.AsRegister<Register>(), Address(base, offset)); 5211 break; 5212 } 5213 5214 case DataType::Type::kInt8: { 5215 __ movsxb(out.AsRegister<Register>(), Address(base, offset)); 5216 break; 5217 } 5218 5219 case DataType::Type::kUint16: { 5220 __ movzxw(out.AsRegister<Register>(), Address(base, offset)); 5221 break; 5222 } 5223 5224 case DataType::Type::kInt16: { 5225 __ movsxw(out.AsRegister<Register>(), Address(base, offset)); 5226 break; 5227 } 5228 5229 case DataType::Type::kInt32: 5230 __ movl(out.AsRegister<Register>(), Address(base, offset)); 5231 break; 5232 5233 case DataType::Type::kReference: { 5234 // /* HeapReference<Object> */ out = *(base + offset) 5235 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 5236 // Note that a potential implicit null check is handled in this 5237 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. 5238 codegen_->GenerateFieldLoadWithBakerReadBarrier( 5239 instruction, out, base, offset, /* needs_null_check= */ true); 5240 if (is_volatile) { 5241 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 5242 } 5243 } else { 5244 __ movl(out.AsRegister<Register>(), Address(base, offset)); 5245 codegen_->MaybeRecordImplicitNullCheck(instruction); 5246 if (is_volatile) { 5247 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 5248 } 5249 // If read barriers are enabled, emit read barriers other than 5250 // Baker's using a slow path (and also unpoison the loaded 5251 // reference, if heap poisoning is enabled). 5252 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 5253 } 5254 break; 5255 } 5256 5257 case DataType::Type::kInt64: { 5258 if (is_volatile) { 5259 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 5260 __ movsd(temp, Address(base, offset)); 5261 codegen_->MaybeRecordImplicitNullCheck(instruction); 5262 __ movd(out.AsRegisterPairLow<Register>(), temp); 5263 __ psrlq(temp, Immediate(32)); 5264 __ movd(out.AsRegisterPairHigh<Register>(), temp); 5265 } else { 5266 DCHECK_NE(base, out.AsRegisterPairLow<Register>()); 5267 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); 5268 codegen_->MaybeRecordImplicitNullCheck(instruction); 5269 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); 5270 } 5271 break; 5272 } 5273 5274 case DataType::Type::kFloat32: { 5275 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 5276 break; 5277 } 5278 5279 case DataType::Type::kFloat64: { 5280 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 5281 break; 5282 } 5283 5284 case DataType::Type::kUint32: 5285 case DataType::Type::kUint64: 5286 case DataType::Type::kVoid: 5287 LOG(FATAL) << "Unreachable type " << load_type; 5288 UNREACHABLE(); 5289 } 5290 5291 if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) { 5292 // Potential implicit null checks, in the case of reference or 5293 // long fields, are handled in the previous switch statement. 5294 } else { 5295 codegen_->MaybeRecordImplicitNullCheck(instruction); 5296 } 5297 5298 if (is_volatile) { 5299 if (load_type == DataType::Type::kReference) { 5300 // Memory barriers, in the case of references, are also handled 5301 // in the previous switch statement. 5302 } else { 5303 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 5304 } 5305 } 5306 } 5307 5308 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { 5309 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 5310 5311 LocationSummary* locations = 5312 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5313 locations->SetInAt(0, Location::RequiresRegister()); 5314 bool is_volatile = field_info.IsVolatile(); 5315 DataType::Type field_type = field_info.GetFieldType(); 5316 bool is_byte_type = DataType::Size(field_type) == 1u; 5317 5318 // The register allocator does not support multiple 5319 // inputs that die at entry with one in a specific register. 5320 if (is_byte_type) { 5321 // Ensure the value is in a byte register. 5322 locations->SetInAt(1, Location::RegisterLocation(EAX)); 5323 } else if (DataType::IsFloatingPointType(field_type)) { 5324 if (is_volatile && field_type == DataType::Type::kFloat64) { 5325 // In order to satisfy the semantics of volatile, this must be a single instruction store. 5326 locations->SetInAt(1, Location::RequiresFpuRegister()); 5327 } else { 5328 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); 5329 } 5330 } else if (is_volatile && field_type == DataType::Type::kInt64) { 5331 // In order to satisfy the semantics of volatile, this must be a single instruction store. 5332 locations->SetInAt(1, Location::RequiresRegister()); 5333 5334 // 64bits value can be atomically written to an address with movsd and an XMM register. 5335 // We need two XMM registers because there's no easier way to (bit) copy a register pair 5336 // into a single XMM register (we copy each pair part into the XMMs and then interleave them). 5337 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the 5338 // isolated cases when we need this it isn't worth adding the extra complexity. 5339 locations->AddTemp(Location::RequiresFpuRegister()); 5340 locations->AddTemp(Location::RequiresFpuRegister()); 5341 } else { 5342 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 5343 5344 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 5345 // Temporary registers for the write barrier. 5346 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too. 5347 // Ensure the card is in a byte register. 5348 locations->AddTemp(Location::RegisterLocation(ECX)); 5349 } 5350 } 5351 } 5352 5353 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, 5354 const FieldInfo& field_info, 5355 bool value_can_be_null) { 5356 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 5357 5358 LocationSummary* locations = instruction->GetLocations(); 5359 Register base = locations->InAt(0).AsRegister<Register>(); 5360 Location value = locations->InAt(1); 5361 bool is_volatile = field_info.IsVolatile(); 5362 DataType::Type field_type = field_info.GetFieldType(); 5363 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 5364 bool needs_write_barrier = 5365 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); 5366 5367 if (is_volatile) { 5368 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); 5369 } 5370 5371 bool maybe_record_implicit_null_check_done = false; 5372 5373 switch (field_type) { 5374 case DataType::Type::kBool: 5375 case DataType::Type::kUint8: 5376 case DataType::Type::kInt8: { 5377 __ movb(Address(base, offset), value.AsRegister<ByteRegister>()); 5378 break; 5379 } 5380 5381 case DataType::Type::kUint16: 5382 case DataType::Type::kInt16: { 5383 if (value.IsConstant()) { 5384 __ movw(Address(base, offset), 5385 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 5386 } else { 5387 __ movw(Address(base, offset), value.AsRegister<Register>()); 5388 } 5389 break; 5390 } 5391 5392 case DataType::Type::kInt32: 5393 case DataType::Type::kReference: { 5394 if (kPoisonHeapReferences && needs_write_barrier) { 5395 // Note that in the case where `value` is a null reference, 5396 // we do not enter this block, as the reference does not 5397 // need poisoning. 5398 DCHECK_EQ(field_type, DataType::Type::kReference); 5399 Register temp = locations->GetTemp(0).AsRegister<Register>(); 5400 __ movl(temp, value.AsRegister<Register>()); 5401 __ PoisonHeapReference(temp); 5402 __ movl(Address(base, offset), temp); 5403 } else if (value.IsConstant()) { 5404 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 5405 __ movl(Address(base, offset), Immediate(v)); 5406 } else { 5407 DCHECK(value.IsRegister()) << value; 5408 __ movl(Address(base, offset), value.AsRegister<Register>()); 5409 } 5410 break; 5411 } 5412 5413 case DataType::Type::kInt64: { 5414 if (is_volatile) { 5415 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 5416 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); 5417 __ movd(temp1, value.AsRegisterPairLow<Register>()); 5418 __ movd(temp2, value.AsRegisterPairHigh<Register>()); 5419 __ punpckldq(temp1, temp2); 5420 __ movsd(Address(base, offset), temp1); 5421 codegen_->MaybeRecordImplicitNullCheck(instruction); 5422 } else if (value.IsConstant()) { 5423 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); 5424 __ movl(Address(base, offset), Immediate(Low32Bits(v))); 5425 codegen_->MaybeRecordImplicitNullCheck(instruction); 5426 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); 5427 } else { 5428 __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); 5429 codegen_->MaybeRecordImplicitNullCheck(instruction); 5430 __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); 5431 } 5432 maybe_record_implicit_null_check_done = true; 5433 break; 5434 } 5435 5436 case DataType::Type::kFloat32: { 5437 if (value.IsConstant()) { 5438 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 5439 __ movl(Address(base, offset), Immediate(v)); 5440 } else { 5441 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 5442 } 5443 break; 5444 } 5445 5446 case DataType::Type::kFloat64: { 5447 if (value.IsConstant()) { 5448 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); 5449 __ movl(Address(base, offset), Immediate(Low32Bits(v))); 5450 codegen_->MaybeRecordImplicitNullCheck(instruction); 5451 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); 5452 maybe_record_implicit_null_check_done = true; 5453 } else { 5454 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 5455 } 5456 break; 5457 } 5458 5459 case DataType::Type::kUint32: 5460 case DataType::Type::kUint64: 5461 case DataType::Type::kVoid: 5462 LOG(FATAL) << "Unreachable type " << field_type; 5463 UNREACHABLE(); 5464 } 5465 5466 if (!maybe_record_implicit_null_check_done) { 5467 codegen_->MaybeRecordImplicitNullCheck(instruction); 5468 } 5469 5470 if (needs_write_barrier) { 5471 Register temp = locations->GetTemp(0).AsRegister<Register>(); 5472 Register card = locations->GetTemp(1).AsRegister<Register>(); 5473 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null); 5474 } 5475 5476 if (is_volatile) { 5477 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 5478 } 5479 } 5480 5481 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5482 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5483 } 5484 5485 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5486 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5487 } 5488 5489 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5490 HandleFieldSet(instruction, instruction->GetFieldInfo()); 5491 } 5492 5493 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5494 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5495 } 5496 5497 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 5498 HandleFieldSet(instruction, instruction->GetFieldInfo()); 5499 } 5500 5501 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 5502 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5503 } 5504 5505 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 5506 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5507 } 5508 5509 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 5510 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5511 } 5512 5513 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet( 5514 HUnresolvedInstanceFieldGet* instruction) { 5515 FieldAccessCallingConventionX86 calling_convention; 5516 codegen_->CreateUnresolvedFieldLocationSummary( 5517 instruction, instruction->GetFieldType(), calling_convention); 5518 } 5519 5520 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet( 5521 HUnresolvedInstanceFieldGet* instruction) { 5522 FieldAccessCallingConventionX86 calling_convention; 5523 codegen_->GenerateUnresolvedFieldAccess(instruction, 5524 instruction->GetFieldType(), 5525 instruction->GetFieldIndex(), 5526 instruction->GetDexPc(), 5527 calling_convention); 5528 } 5529 5530 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet( 5531 HUnresolvedInstanceFieldSet* instruction) { 5532 FieldAccessCallingConventionX86 calling_convention; 5533 codegen_->CreateUnresolvedFieldLocationSummary( 5534 instruction, instruction->GetFieldType(), calling_convention); 5535 } 5536 5537 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet( 5538 HUnresolvedInstanceFieldSet* instruction) { 5539 FieldAccessCallingConventionX86 calling_convention; 5540 codegen_->GenerateUnresolvedFieldAccess(instruction, 5541 instruction->GetFieldType(), 5542 instruction->GetFieldIndex(), 5543 instruction->GetDexPc(), 5544 calling_convention); 5545 } 5546 5547 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet( 5548 HUnresolvedStaticFieldGet* instruction) { 5549 FieldAccessCallingConventionX86 calling_convention; 5550 codegen_->CreateUnresolvedFieldLocationSummary( 5551 instruction, instruction->GetFieldType(), calling_convention); 5552 } 5553 5554 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet( 5555 HUnresolvedStaticFieldGet* instruction) { 5556 FieldAccessCallingConventionX86 calling_convention; 5557 codegen_->GenerateUnresolvedFieldAccess(instruction, 5558 instruction->GetFieldType(), 5559 instruction->GetFieldIndex(), 5560 instruction->GetDexPc(), 5561 calling_convention); 5562 } 5563 5564 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet( 5565 HUnresolvedStaticFieldSet* instruction) { 5566 FieldAccessCallingConventionX86 calling_convention; 5567 codegen_->CreateUnresolvedFieldLocationSummary( 5568 instruction, instruction->GetFieldType(), calling_convention); 5569 } 5570 5571 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet( 5572 HUnresolvedStaticFieldSet* instruction) { 5573 FieldAccessCallingConventionX86 calling_convention; 5574 codegen_->GenerateUnresolvedFieldAccess(instruction, 5575 instruction->GetFieldType(), 5576 instruction->GetFieldIndex(), 5577 instruction->GetDexPc(), 5578 calling_convention); 5579 } 5580 5581 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { 5582 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 5583 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() 5584 ? Location::RequiresRegister() 5585 : Location::Any(); 5586 locations->SetInAt(0, loc); 5587 } 5588 5589 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) { 5590 if (CanMoveNullCheckToUser(instruction)) { 5591 return; 5592 } 5593 LocationSummary* locations = instruction->GetLocations(); 5594 Location obj = locations->InAt(0); 5595 5596 __ testl(EAX, Address(obj.AsRegister<Register>(), 0)); 5597 RecordPcInfo(instruction, instruction->GetDexPc()); 5598 } 5599 5600 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) { 5601 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction); 5602 AddSlowPath(slow_path); 5603 5604 LocationSummary* locations = instruction->GetLocations(); 5605 Location obj = locations->InAt(0); 5606 5607 if (obj.IsRegister()) { 5608 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>()); 5609 } else if (obj.IsStackSlot()) { 5610 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0)); 5611 } else { 5612 DCHECK(obj.IsConstant()) << obj; 5613 DCHECK(obj.GetConstant()->IsNullConstant()); 5614 __ jmp(slow_path->GetEntryLabel()); 5615 return; 5616 } 5617 __ j(kEqual, slow_path->GetEntryLabel()); 5618 } 5619 5620 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { 5621 codegen_->GenerateNullCheck(instruction); 5622 } 5623 5624 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { 5625 bool object_array_get_with_read_barrier = 5626 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 5627 LocationSummary* locations = 5628 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 5629 object_array_get_with_read_barrier 5630 ? LocationSummary::kCallOnSlowPath 5631 : LocationSummary::kNoCall); 5632 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 5633 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5634 } 5635 locations->SetInAt(0, Location::RequiresRegister()); 5636 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 5637 if (DataType::IsFloatingPointType(instruction->GetType())) { 5638 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5639 } else { 5640 // The output overlaps in case of long: we don't want the low move 5641 // to overwrite the array's location. Likewise, in the case of an 5642 // object array get with read barriers enabled, we do not want the 5643 // move to overwrite the array's location, as we need it to emit 5644 // the read barrier. 5645 locations->SetOut( 5646 Location::RequiresRegister(), 5647 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier) 5648 ? Location::kOutputOverlap 5649 : Location::kNoOutputOverlap); 5650 } 5651 } 5652 5653 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { 5654 LocationSummary* locations = instruction->GetLocations(); 5655 Location obj_loc = locations->InAt(0); 5656 Register obj = obj_loc.AsRegister<Register>(); 5657 Location index = locations->InAt(1); 5658 Location out_loc = locations->Out(); 5659 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); 5660 5661 DataType::Type type = instruction->GetType(); 5662 switch (type) { 5663 case DataType::Type::kBool: 5664 case DataType::Type::kUint8: { 5665 Register out = out_loc.AsRegister<Register>(); 5666 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); 5667 break; 5668 } 5669 5670 case DataType::Type::kInt8: { 5671 Register out = out_loc.AsRegister<Register>(); 5672 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); 5673 break; 5674 } 5675 5676 case DataType::Type::kUint16: { 5677 Register out = out_loc.AsRegister<Register>(); 5678 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 5679 // Branch cases into compressed and uncompressed for each index's type. 5680 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 5681 NearLabel done, not_compressed; 5682 __ testb(Address(obj, count_offset), Immediate(1)); 5683 codegen_->MaybeRecordImplicitNullCheck(instruction); 5684 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 5685 "Expecting 0=compressed, 1=uncompressed"); 5686 __ j(kNotZero, ¬_compressed); 5687 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); 5688 __ jmp(&done); 5689 __ Bind(¬_compressed); 5690 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); 5691 __ Bind(&done); 5692 } else { 5693 // Common case for charAt of array of char or when string compression's 5694 // feature is turned off. 5695 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); 5696 } 5697 break; 5698 } 5699 5700 case DataType::Type::kInt16: { 5701 Register out = out_loc.AsRegister<Register>(); 5702 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); 5703 break; 5704 } 5705 5706 case DataType::Type::kInt32: { 5707 Register out = out_loc.AsRegister<Register>(); 5708 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); 5709 break; 5710 } 5711 5712 case DataType::Type::kReference: { 5713 static_assert( 5714 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 5715 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 5716 // /* HeapReference<Object> */ out = 5717 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 5718 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 5719 // Note that a potential implicit null check is handled in this 5720 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. 5721 codegen_->GenerateArrayLoadWithBakerReadBarrier( 5722 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true); 5723 } else { 5724 Register out = out_loc.AsRegister<Register>(); 5725 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); 5726 codegen_->MaybeRecordImplicitNullCheck(instruction); 5727 // If read barriers are enabled, emit read barriers other than 5728 // Baker's using a slow path (and also unpoison the loaded 5729 // reference, if heap poisoning is enabled). 5730 if (index.IsConstant()) { 5731 uint32_t offset = 5732 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; 5733 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); 5734 } else { 5735 codegen_->MaybeGenerateReadBarrierSlow( 5736 instruction, out_loc, out_loc, obj_loc, data_offset, index); 5737 } 5738 } 5739 break; 5740 } 5741 5742 case DataType::Type::kInt64: { 5743 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>()); 5744 __ movl(out_loc.AsRegisterPairLow<Register>(), 5745 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset)); 5746 codegen_->MaybeRecordImplicitNullCheck(instruction); 5747 __ movl(out_loc.AsRegisterPairHigh<Register>(), 5748 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize)); 5749 break; 5750 } 5751 5752 case DataType::Type::kFloat32: { 5753 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 5754 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); 5755 break; 5756 } 5757 5758 case DataType::Type::kFloat64: { 5759 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 5760 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset)); 5761 break; 5762 } 5763 5764 case DataType::Type::kUint32: 5765 case DataType::Type::kUint64: 5766 case DataType::Type::kVoid: 5767 LOG(FATAL) << "Unreachable type " << type; 5768 UNREACHABLE(); 5769 } 5770 5771 if (type == DataType::Type::kReference || type == DataType::Type::kInt64) { 5772 // Potential implicit null checks, in the case of reference or 5773 // long arrays, are handled in the previous switch statement. 5774 } else { 5775 codegen_->MaybeRecordImplicitNullCheck(instruction); 5776 } 5777 } 5778 5779 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { 5780 DataType::Type value_type = instruction->GetComponentType(); 5781 5782 bool needs_write_barrier = 5783 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 5784 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 5785 5786 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5787 instruction, 5788 may_need_runtime_call_for_type_check ? 5789 LocationSummary::kCallOnSlowPath : 5790 LocationSummary::kNoCall); 5791 5792 bool is_byte_type = DataType::Size(value_type) == 1u; 5793 // We need the inputs to be different than the output in case of long operation. 5794 // In case of a byte operation, the register allocator does not support multiple 5795 // inputs that die at entry with one in a specific register. 5796 locations->SetInAt(0, Location::RequiresRegister()); 5797 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 5798 if (is_byte_type) { 5799 // Ensure the value is in a byte register. 5800 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); 5801 } else if (DataType::IsFloatingPointType(value_type)) { 5802 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); 5803 } else { 5804 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); 5805 } 5806 if (needs_write_barrier) { 5807 // Temporary registers for the write barrier. 5808 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. 5809 // Ensure the card is in a byte register. 5810 locations->AddTemp(Location::RegisterLocation(ECX)); 5811 } 5812 } 5813 5814 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { 5815 LocationSummary* locations = instruction->GetLocations(); 5816 Location array_loc = locations->InAt(0); 5817 Register array = array_loc.AsRegister<Register>(); 5818 Location index = locations->InAt(1); 5819 Location value = locations->InAt(2); 5820 DataType::Type value_type = instruction->GetComponentType(); 5821 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 5822 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 5823 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 5824 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 5825 bool needs_write_barrier = 5826 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 5827 5828 switch (value_type) { 5829 case DataType::Type::kBool: 5830 case DataType::Type::kUint8: 5831 case DataType::Type::kInt8: { 5832 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); 5833 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset); 5834 if (value.IsRegister()) { 5835 __ movb(address, value.AsRegister<ByteRegister>()); 5836 } else { 5837 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); 5838 } 5839 codegen_->MaybeRecordImplicitNullCheck(instruction); 5840 break; 5841 } 5842 5843 case DataType::Type::kUint16: 5844 case DataType::Type::kInt16: { 5845 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); 5846 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset); 5847 if (value.IsRegister()) { 5848 __ movw(address, value.AsRegister<Register>()); 5849 } else { 5850 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 5851 } 5852 codegen_->MaybeRecordImplicitNullCheck(instruction); 5853 break; 5854 } 5855 5856 case DataType::Type::kReference: { 5857 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 5858 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); 5859 5860 if (!value.IsRegister()) { 5861 // Just setting null. 5862 DCHECK(instruction->InputAt(2)->IsNullConstant()); 5863 DCHECK(value.IsConstant()) << value; 5864 __ movl(address, Immediate(0)); 5865 codegen_->MaybeRecordImplicitNullCheck(instruction); 5866 DCHECK(!needs_write_barrier); 5867 DCHECK(!may_need_runtime_call_for_type_check); 5868 break; 5869 } 5870 5871 DCHECK(needs_write_barrier); 5872 Register register_value = value.AsRegister<Register>(); 5873 // We cannot use a NearLabel for `done`, as its range may be too 5874 // short when Baker read barriers are enabled. 5875 Label done; 5876 NearLabel not_null, do_put; 5877 SlowPathCode* slow_path = nullptr; 5878 Location temp_loc = locations->GetTemp(0); 5879 Register temp = temp_loc.AsRegister<Register>(); 5880 if (may_need_runtime_call_for_type_check) { 5881 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction); 5882 codegen_->AddSlowPath(slow_path); 5883 if (instruction->GetValueCanBeNull()) { 5884 __ testl(register_value, register_value); 5885 __ j(kNotEqual, ¬_null); 5886 __ movl(address, Immediate(0)); 5887 codegen_->MaybeRecordImplicitNullCheck(instruction); 5888 __ jmp(&done); 5889 __ Bind(¬_null); 5890 } 5891 5892 // Note that when Baker read barriers are enabled, the type 5893 // checks are performed without read barriers. This is fine, 5894 // even in the case where a class object is in the from-space 5895 // after the flip, as a comparison involving such a type would 5896 // not produce a false positive; it may of course produce a 5897 // false negative, in which case we would take the ArraySet 5898 // slow path. 5899 5900 // /* HeapReference<Class> */ temp = array->klass_ 5901 __ movl(temp, Address(array, class_offset)); 5902 codegen_->MaybeRecordImplicitNullCheck(instruction); 5903 __ MaybeUnpoisonHeapReference(temp); 5904 5905 // /* HeapReference<Class> */ temp = temp->component_type_ 5906 __ movl(temp, Address(temp, component_offset)); 5907 // If heap poisoning is enabled, no need to unpoison `temp` 5908 // nor the object reference in `register_value->klass`, as 5909 // we are comparing two poisoned references. 5910 __ cmpl(temp, Address(register_value, class_offset)); 5911 5912 if (instruction->StaticTypeOfArrayIsObjectArray()) { 5913 __ j(kEqual, &do_put); 5914 // If heap poisoning is enabled, the `temp` reference has 5915 // not been unpoisoned yet; unpoison it now. 5916 __ MaybeUnpoisonHeapReference(temp); 5917 5918 // If heap poisoning is enabled, no need to unpoison the 5919 // heap reference loaded below, as it is only used for a 5920 // comparison with null. 5921 __ cmpl(Address(temp, super_offset), Immediate(0)); 5922 __ j(kNotEqual, slow_path->GetEntryLabel()); 5923 __ Bind(&do_put); 5924 } else { 5925 __ j(kNotEqual, slow_path->GetEntryLabel()); 5926 } 5927 } 5928 5929 if (kPoisonHeapReferences) { 5930 __ movl(temp, register_value); 5931 __ PoisonHeapReference(temp); 5932 __ movl(address, temp); 5933 } else { 5934 __ movl(address, register_value); 5935 } 5936 if (!may_need_runtime_call_for_type_check) { 5937 codegen_->MaybeRecordImplicitNullCheck(instruction); 5938 } 5939 5940 Register card = locations->GetTemp(1).AsRegister<Register>(); 5941 codegen_->MarkGCCard( 5942 temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull()); 5943 __ Bind(&done); 5944 5945 if (slow_path != nullptr) { 5946 __ Bind(slow_path->GetExitLabel()); 5947 } 5948 5949 break; 5950 } 5951 5952 case DataType::Type::kInt32: { 5953 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 5954 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); 5955 if (value.IsRegister()) { 5956 __ movl(address, value.AsRegister<Register>()); 5957 } else { 5958 DCHECK(value.IsConstant()) << value; 5959 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 5960 __ movl(address, Immediate(v)); 5961 } 5962 codegen_->MaybeRecordImplicitNullCheck(instruction); 5963 break; 5964 } 5965 5966 case DataType::Type::kInt64: { 5967 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); 5968 if (value.IsRegisterPair()) { 5969 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset), 5970 value.AsRegisterPairLow<Register>()); 5971 codegen_->MaybeRecordImplicitNullCheck(instruction); 5972 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize), 5973 value.AsRegisterPairHigh<Register>()); 5974 } else { 5975 DCHECK(value.IsConstant()); 5976 int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); 5977 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset), 5978 Immediate(Low32Bits(val))); 5979 codegen_->MaybeRecordImplicitNullCheck(instruction); 5980 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize), 5981 Immediate(High32Bits(val))); 5982 } 5983 break; 5984 } 5985 5986 case DataType::Type::kFloat32: { 5987 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); 5988 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); 5989 if (value.IsFpuRegister()) { 5990 __ movss(address, value.AsFpuRegister<XmmRegister>()); 5991 } else { 5992 DCHECK(value.IsConstant()); 5993 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 5994 __ movl(address, Immediate(v)); 5995 } 5996 codegen_->MaybeRecordImplicitNullCheck(instruction); 5997 break; 5998 } 5999 6000 case DataType::Type::kFloat64: { 6001 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); 6002 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset); 6003 if (value.IsFpuRegister()) { 6004 __ movsd(address, value.AsFpuRegister<XmmRegister>()); 6005 } else { 6006 DCHECK(value.IsConstant()); 6007 Address address_hi = 6008 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize); 6009 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 6010 __ movl(address, Immediate(Low32Bits(v))); 6011 codegen_->MaybeRecordImplicitNullCheck(instruction); 6012 __ movl(address_hi, Immediate(High32Bits(v))); 6013 } 6014 break; 6015 } 6016 6017 case DataType::Type::kUint32: 6018 case DataType::Type::kUint64: 6019 case DataType::Type::kVoid: 6020 LOG(FATAL) << "Unreachable type " << instruction->GetType(); 6021 UNREACHABLE(); 6022 } 6023 } 6024 6025 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) { 6026 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 6027 locations->SetInAt(0, Location::RequiresRegister()); 6028 if (!instruction->IsEmittedAtUseSite()) { 6029 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 6030 } 6031 } 6032 6033 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { 6034 if (instruction->IsEmittedAtUseSite()) { 6035 return; 6036 } 6037 6038 LocationSummary* locations = instruction->GetLocations(); 6039 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 6040 Register obj = locations->InAt(0).AsRegister<Register>(); 6041 Register out = locations->Out().AsRegister<Register>(); 6042 __ movl(out, Address(obj, offset)); 6043 codegen_->MaybeRecordImplicitNullCheck(instruction); 6044 // Mask out most significant bit in case the array is String's array of char. 6045 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 6046 __ shrl(out, Immediate(1)); 6047 } 6048 } 6049 6050 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { 6051 RegisterSet caller_saves = RegisterSet::Empty(); 6052 InvokeRuntimeCallingConvention calling_convention; 6053 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6054 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 6055 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 6056 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 6057 HInstruction* length = instruction->InputAt(1); 6058 if (!length->IsEmittedAtUseSite()) { 6059 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 6060 } 6061 // Need register to see array's length. 6062 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 6063 locations->AddTemp(Location::RequiresRegister()); 6064 } 6065 } 6066 6067 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { 6068 const bool is_string_compressed_char_at = 6069 mirror::kUseStringCompression && instruction->IsStringCharAt(); 6070 LocationSummary* locations = instruction->GetLocations(); 6071 Location index_loc = locations->InAt(0); 6072 Location length_loc = locations->InAt(1); 6073 SlowPathCode* slow_path = 6074 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction); 6075 6076 if (length_loc.IsConstant()) { 6077 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); 6078 if (index_loc.IsConstant()) { 6079 // BCE will remove the bounds check if we are guarenteed to pass. 6080 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 6081 if (index < 0 || index >= length) { 6082 codegen_->AddSlowPath(slow_path); 6083 __ jmp(slow_path->GetEntryLabel()); 6084 } else { 6085 // Some optimization after BCE may have generated this, and we should not 6086 // generate a bounds check if it is a valid range. 6087 } 6088 return; 6089 } 6090 6091 // We have to reverse the jump condition because the length is the constant. 6092 Register index_reg = index_loc.AsRegister<Register>(); 6093 __ cmpl(index_reg, Immediate(length)); 6094 codegen_->AddSlowPath(slow_path); 6095 __ j(kAboveEqual, slow_path->GetEntryLabel()); 6096 } else { 6097 HInstruction* array_length = instruction->InputAt(1); 6098 if (array_length->IsEmittedAtUseSite()) { 6099 // Address the length field in the array. 6100 DCHECK(array_length->IsArrayLength()); 6101 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); 6102 Location array_loc = array_length->GetLocations()->InAt(0); 6103 Address array_len(array_loc.AsRegister<Register>(), len_offset); 6104 if (is_string_compressed_char_at) { 6105 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for 6106 // the string compression flag) with the in-memory length and avoid the temporary. 6107 Register length_reg = locations->GetTemp(0).AsRegister<Register>(); 6108 __ movl(length_reg, array_len); 6109 codegen_->MaybeRecordImplicitNullCheck(array_length); 6110 __ shrl(length_reg, Immediate(1)); 6111 codegen_->GenerateIntCompare(length_reg, index_loc); 6112 } else { 6113 // Checking bounds for general case: 6114 // Array of char or string's array with feature compression off. 6115 if (index_loc.IsConstant()) { 6116 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 6117 __ cmpl(array_len, Immediate(value)); 6118 } else { 6119 __ cmpl(array_len, index_loc.AsRegister<Register>()); 6120 } 6121 codegen_->MaybeRecordImplicitNullCheck(array_length); 6122 } 6123 } else { 6124 codegen_->GenerateIntCompare(length_loc, index_loc); 6125 } 6126 codegen_->AddSlowPath(slow_path); 6127 __ j(kBelowEqual, slow_path->GetEntryLabel()); 6128 } 6129 } 6130 6131 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 6132 LOG(FATAL) << "Unreachable"; 6133 } 6134 6135 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) { 6136 if (instruction->GetNext()->IsSuspendCheck() && 6137 instruction->GetBlock()->GetLoopInformation() != nullptr) { 6138 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 6139 // The back edge will generate the suspend check. 6140 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 6141 } 6142 6143 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 6144 } 6145 6146 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { 6147 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6148 instruction, LocationSummary::kCallOnSlowPath); 6149 // In suspend check slow path, usually there are no caller-save registers at all. 6150 // If SIMD instructions are present, however, we force spilling all live SIMD 6151 // registers in full width (since the runtime only saves/restores lower part). 6152 locations->SetCustomSlowPathCallerSaves( 6153 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 6154 } 6155 6156 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { 6157 HBasicBlock* block = instruction->GetBlock(); 6158 if (block->GetLoopInformation() != nullptr) { 6159 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 6160 // The back edge will generate the suspend check. 6161 return; 6162 } 6163 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 6164 // The goto will generate the suspend check. 6165 return; 6166 } 6167 GenerateSuspendCheck(instruction, nullptr); 6168 } 6169 6170 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction, 6171 HBasicBlock* successor) { 6172 SuspendCheckSlowPathX86* slow_path = 6173 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath()); 6174 if (slow_path == nullptr) { 6175 slow_path = 6176 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor); 6177 instruction->SetSlowPath(slow_path); 6178 codegen_->AddSlowPath(slow_path); 6179 if (successor != nullptr) { 6180 DCHECK(successor->IsLoopHeader()); 6181 } 6182 } else { 6183 DCHECK_EQ(slow_path->GetSuccessor(), successor); 6184 } 6185 6186 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()), 6187 Immediate(0)); 6188 if (successor == nullptr) { 6189 __ j(kNotEqual, slow_path->GetEntryLabel()); 6190 __ Bind(slow_path->GetReturnLabel()); 6191 } else { 6192 __ j(kEqual, codegen_->GetLabelOf(successor)); 6193 __ jmp(slow_path->GetEntryLabel()); 6194 } 6195 } 6196 6197 X86Assembler* ParallelMoveResolverX86::GetAssembler() const { 6198 return codegen_->GetAssembler(); 6199 } 6200 6201 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) { 6202 ScratchRegisterScope ensure_scratch( 6203 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); 6204 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); 6205 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; 6206 6207 // Now that temp register is available (possibly spilled), move blocks of memory. 6208 for (int i = 0; i < number_of_words; i++) { 6209 __ movl(temp_reg, Address(ESP, src + stack_offset)); 6210 __ movl(Address(ESP, dst + stack_offset), temp_reg); 6211 stack_offset += kX86WordSize; 6212 } 6213 } 6214 6215 void ParallelMoveResolverX86::EmitMove(size_t index) { 6216 MoveOperands* move = moves_[index]; 6217 Location source = move->GetSource(); 6218 Location destination = move->GetDestination(); 6219 6220 if (source.IsRegister()) { 6221 if (destination.IsRegister()) { 6222 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>()); 6223 } else if (destination.IsFpuRegister()) { 6224 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>()); 6225 } else { 6226 DCHECK(destination.IsStackSlot()); 6227 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>()); 6228 } 6229 } else if (source.IsRegisterPair()) { 6230 size_t elem_size = DataType::Size(DataType::Type::kInt32); 6231 // Create stack space for 2 elements. 6232 __ subl(ESP, Immediate(2 * elem_size)); 6233 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>()); 6234 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>()); 6235 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); 6236 // And remove the temporary stack space we allocated. 6237 __ addl(ESP, Immediate(2 * elem_size)); 6238 } else if (source.IsFpuRegister()) { 6239 if (destination.IsRegister()) { 6240 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>()); 6241 } else if (destination.IsFpuRegister()) { 6242 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 6243 } else if (destination.IsRegisterPair()) { 6244 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>(); 6245 __ movd(destination.AsRegisterPairLow<Register>(), src_reg); 6246 __ psrlq(src_reg, Immediate(32)); 6247 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg); 6248 } else if (destination.IsStackSlot()) { 6249 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); 6250 } else if (destination.IsDoubleStackSlot()) { 6251 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); 6252 } else { 6253 DCHECK(destination.IsSIMDStackSlot()); 6254 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); 6255 } 6256 } else if (source.IsStackSlot()) { 6257 if (destination.IsRegister()) { 6258 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex())); 6259 } else if (destination.IsFpuRegister()) { 6260 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); 6261 } else { 6262 DCHECK(destination.IsStackSlot()); 6263 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); 6264 } 6265 } else if (source.IsDoubleStackSlot()) { 6266 if (destination.IsRegisterPair()) { 6267 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex())); 6268 __ movl(destination.AsRegisterPairHigh<Register>(), 6269 Address(ESP, source.GetHighStackIndex(kX86WordSize))); 6270 } else if (destination.IsFpuRegister()) { 6271 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); 6272 } else { 6273 DCHECK(destination.IsDoubleStackSlot()) << destination; 6274 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); 6275 } 6276 } else if (source.IsSIMDStackSlot()) { 6277 if (destination.IsFpuRegister()) { 6278 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); 6279 } else { 6280 DCHECK(destination.IsSIMDStackSlot()); 6281 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); 6282 } 6283 } else if (source.IsConstant()) { 6284 HConstant* constant = source.GetConstant(); 6285 if (constant->IsIntConstant() || constant->IsNullConstant()) { 6286 int32_t value = CodeGenerator::GetInt32ValueOf(constant); 6287 if (destination.IsRegister()) { 6288 if (value == 0) { 6289 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>()); 6290 } else { 6291 __ movl(destination.AsRegister<Register>(), Immediate(value)); 6292 } 6293 } else { 6294 DCHECK(destination.IsStackSlot()) << destination; 6295 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); 6296 } 6297 } else if (constant->IsFloatConstant()) { 6298 float fp_value = constant->AsFloatConstant()->GetValue(); 6299 int32_t value = bit_cast<int32_t, float>(fp_value); 6300 Immediate imm(value); 6301 if (destination.IsFpuRegister()) { 6302 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 6303 if (value == 0) { 6304 // Easy handling of 0.0. 6305 __ xorps(dest, dest); 6306 } else { 6307 ScratchRegisterScope ensure_scratch( 6308 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); 6309 Register temp = static_cast<Register>(ensure_scratch.GetRegister()); 6310 __ movl(temp, Immediate(value)); 6311 __ movd(dest, temp); 6312 } 6313 } else { 6314 DCHECK(destination.IsStackSlot()) << destination; 6315 __ movl(Address(ESP, destination.GetStackIndex()), imm); 6316 } 6317 } else if (constant->IsLongConstant()) { 6318 int64_t value = constant->AsLongConstant()->GetValue(); 6319 int32_t low_value = Low32Bits(value); 6320 int32_t high_value = High32Bits(value); 6321 Immediate low(low_value); 6322 Immediate high(high_value); 6323 if (destination.IsDoubleStackSlot()) { 6324 __ movl(Address(ESP, destination.GetStackIndex()), low); 6325 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); 6326 } else { 6327 __ movl(destination.AsRegisterPairLow<Register>(), low); 6328 __ movl(destination.AsRegisterPairHigh<Register>(), high); 6329 } 6330 } else { 6331 DCHECK(constant->IsDoubleConstant()); 6332 double dbl_value = constant->AsDoubleConstant()->GetValue(); 6333 int64_t value = bit_cast<int64_t, double>(dbl_value); 6334 int32_t low_value = Low32Bits(value); 6335 int32_t high_value = High32Bits(value); 6336 Immediate low(low_value); 6337 Immediate high(high_value); 6338 if (destination.IsFpuRegister()) { 6339 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 6340 if (value == 0) { 6341 // Easy handling of 0.0. 6342 __ xorpd(dest, dest); 6343 } else { 6344 __ pushl(high); 6345 __ pushl(low); 6346 __ movsd(dest, Address(ESP, 0)); 6347 __ addl(ESP, Immediate(8)); 6348 } 6349 } else { 6350 DCHECK(destination.IsDoubleStackSlot()) << destination; 6351 __ movl(Address(ESP, destination.GetStackIndex()), low); 6352 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); 6353 } 6354 } 6355 } else { 6356 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source; 6357 } 6358 } 6359 6360 void ParallelMoveResolverX86::Exchange(Register reg, int mem) { 6361 Register suggested_scratch = reg == EAX ? EBX : EAX; 6362 ScratchRegisterScope ensure_scratch( 6363 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters()); 6364 6365 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; 6366 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); 6367 __ movl(Address(ESP, mem + stack_offset), reg); 6368 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister())); 6369 } 6370 6371 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { 6372 ScratchRegisterScope ensure_scratch( 6373 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); 6374 6375 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); 6376 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; 6377 __ movl(temp_reg, Address(ESP, mem + stack_offset)); 6378 __ movss(Address(ESP, mem + stack_offset), reg); 6379 __ movd(reg, temp_reg); 6380 } 6381 6382 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) { 6383 size_t extra_slot = 4 * kX86WordSize; 6384 __ subl(ESP, Immediate(extra_slot)); 6385 __ movups(Address(ESP, 0), XmmRegister(reg)); 6386 ExchangeMemory(0, mem + extra_slot, 4); 6387 __ movups(XmmRegister(reg), Address(ESP, 0)); 6388 __ addl(ESP, Immediate(extra_slot)); 6389 } 6390 6391 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) { 6392 ScratchRegisterScope ensure_scratch1( 6393 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); 6394 6395 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX; 6396 ScratchRegisterScope ensure_scratch2( 6397 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters()); 6398 6399 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; 6400 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; 6401 6402 // Now that temp registers are available (possibly spilled), exchange blocks of memory. 6403 for (int i = 0; i < number_of_words; i++) { 6404 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); 6405 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); 6406 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); 6407 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); 6408 stack_offset += kX86WordSize; 6409 } 6410 } 6411 6412 void ParallelMoveResolverX86::EmitSwap(size_t index) { 6413 MoveOperands* move = moves_[index]; 6414 Location source = move->GetSource(); 6415 Location destination = move->GetDestination(); 6416 6417 if (source.IsRegister() && destination.IsRegister()) { 6418 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary. 6419 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>()); 6420 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); 6421 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>()); 6422 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); 6423 } else if (source.IsRegister() && destination.IsStackSlot()) { 6424 Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); 6425 } else if (source.IsStackSlot() && destination.IsRegister()) { 6426 Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); 6427 } else if (source.IsStackSlot() && destination.IsStackSlot()) { 6428 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); 6429 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { 6430 // Use XOR Swap algorithm to avoid a temporary. 6431 DCHECK_NE(source.reg(), destination.reg()); 6432 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 6433 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); 6434 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 6435 } else if (source.IsFpuRegister() && destination.IsStackSlot()) { 6436 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 6437 } else if (destination.IsFpuRegister() && source.IsStackSlot()) { 6438 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 6439 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { 6440 // Take advantage of the 16 bytes in the XMM register. 6441 XmmRegister reg = source.AsFpuRegister<XmmRegister>(); 6442 Address stack(ESP, destination.GetStackIndex()); 6443 // Load the double into the high doubleword. 6444 __ movhpd(reg, stack); 6445 6446 // Store the low double into the destination. 6447 __ movsd(stack, reg); 6448 6449 // Move the high double to the low double. 6450 __ psrldq(reg, Immediate(8)); 6451 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) { 6452 // Take advantage of the 16 bytes in the XMM register. 6453 XmmRegister reg = destination.AsFpuRegister<XmmRegister>(); 6454 Address stack(ESP, source.GetStackIndex()); 6455 // Load the double into the high doubleword. 6456 __ movhpd(reg, stack); 6457 6458 // Store the low double into the destination. 6459 __ movsd(stack, reg); 6460 6461 // Move the high double to the low double. 6462 __ psrldq(reg, Immediate(8)); 6463 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) { 6464 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); 6465 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { 6466 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); 6467 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { 6468 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 6469 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { 6470 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 6471 } else { 6472 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination; 6473 } 6474 } 6475 6476 void ParallelMoveResolverX86::SpillScratch(int reg) { 6477 __ pushl(static_cast<Register>(reg)); 6478 } 6479 6480 void ParallelMoveResolverX86::RestoreScratch(int reg) { 6481 __ popl(static_cast<Register>(reg)); 6482 } 6483 6484 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( 6485 HLoadClass::LoadKind desired_class_load_kind) { 6486 switch (desired_class_load_kind) { 6487 case HLoadClass::LoadKind::kInvalid: 6488 LOG(FATAL) << "UNREACHABLE"; 6489 UNREACHABLE(); 6490 case HLoadClass::LoadKind::kReferrersClass: 6491 break; 6492 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 6493 case HLoadClass::LoadKind::kBootImageRelRo: 6494 case HLoadClass::LoadKind::kBssEntry: 6495 DCHECK(!Runtime::Current()->UseJitCompilation()); 6496 break; 6497 case HLoadClass::LoadKind::kJitBootImageAddress: 6498 case HLoadClass::LoadKind::kJitTableAddress: 6499 DCHECK(Runtime::Current()->UseJitCompilation()); 6500 break; 6501 case HLoadClass::LoadKind::kRuntimeCall: 6502 break; 6503 } 6504 return desired_class_load_kind; 6505 } 6506 6507 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { 6508 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 6509 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 6510 InvokeRuntimeCallingConvention calling_convention; 6511 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 6512 cls, 6513 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 6514 Location::RegisterLocation(EAX)); 6515 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX); 6516 return; 6517 } 6518 DCHECK(!cls->NeedsAccessCheck()); 6519 6520 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 6521 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 6522 ? LocationSummary::kCallOnSlowPath 6523 : LocationSummary::kNoCall; 6524 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 6525 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 6526 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 6527 } 6528 6529 if (load_kind == HLoadClass::LoadKind::kReferrersClass || 6530 load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || 6531 load_kind == HLoadClass::LoadKind::kBootImageRelRo || 6532 load_kind == HLoadClass::LoadKind::kBssEntry) { 6533 locations->SetInAt(0, Location::RequiresRegister()); 6534 } 6535 locations->SetOut(Location::RequiresRegister()); 6536 if (load_kind == HLoadClass::LoadKind::kBssEntry) { 6537 if (!kUseReadBarrier || kUseBakerReadBarrier) { 6538 // Rely on the type resolution and/or initialization to save everything. 6539 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6540 } else { 6541 // For non-Baker read barrier we have a temp-clobbering call. 6542 } 6543 } 6544 } 6545 6546 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file, 6547 dex::TypeIndex type_index, 6548 Handle<mirror::Class> handle) { 6549 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 6550 // Add a patch entry and return the label. 6551 jit_class_patches_.emplace_back(&dex_file, type_index.index_); 6552 PatchInfo<Label>* info = &jit_class_patches_.back(); 6553 return &info->label; 6554 } 6555 6556 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 6557 // move. 6558 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 6559 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 6560 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 6561 codegen_->GenerateLoadClassRuntimeCall(cls); 6562 return; 6563 } 6564 DCHECK(!cls->NeedsAccessCheck()); 6565 6566 LocationSummary* locations = cls->GetLocations(); 6567 Location out_loc = locations->Out(); 6568 Register out = out_loc.AsRegister<Register>(); 6569 6570 bool generate_null_check = false; 6571 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 6572 ? kWithoutReadBarrier 6573 : kCompilerReadBarrierOption; 6574 switch (load_kind) { 6575 case HLoadClass::LoadKind::kReferrersClass: { 6576 DCHECK(!cls->CanCallRuntime()); 6577 DCHECK(!cls->MustGenerateClinitCheck()); 6578 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 6579 Register current_method = locations->InAt(0).AsRegister<Register>(); 6580 GenerateGcRootFieldLoad( 6581 cls, 6582 out_loc, 6583 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), 6584 /* fixup_label= */ nullptr, 6585 read_barrier_option); 6586 break; 6587 } 6588 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { 6589 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 6590 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 6591 Register method_address = locations->InAt(0).AsRegister<Register>(); 6592 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); 6593 codegen_->RecordBootImageTypePatch(cls); 6594 break; 6595 } 6596 case HLoadClass::LoadKind::kBootImageRelRo: { 6597 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 6598 Register method_address = locations->InAt(0).AsRegister<Register>(); 6599 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); 6600 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(), 6601 codegen_->GetBootImageOffset(cls)); 6602 break; 6603 } 6604 case HLoadClass::LoadKind::kBssEntry: { 6605 Register method_address = locations->InAt(0).AsRegister<Register>(); 6606 Address address(method_address, CodeGeneratorX86::kDummy32BitOffset); 6607 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); 6608 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 6609 generate_null_check = true; 6610 break; 6611 } 6612 case HLoadClass::LoadKind::kJitBootImageAddress: { 6613 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 6614 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); 6615 DCHECK_NE(address, 0u); 6616 __ movl(out, Immediate(address)); 6617 break; 6618 } 6619 case HLoadClass::LoadKind::kJitTableAddress: { 6620 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset); 6621 Label* fixup_label = codegen_->NewJitRootClassPatch( 6622 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); 6623 // /* GcRoot<mirror::Class> */ out = *address 6624 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 6625 break; 6626 } 6627 case HLoadClass::LoadKind::kRuntimeCall: 6628 case HLoadClass::LoadKind::kInvalid: 6629 LOG(FATAL) << "UNREACHABLE"; 6630 UNREACHABLE(); 6631 } 6632 6633 if (generate_null_check || cls->MustGenerateClinitCheck()) { 6634 DCHECK(cls->CanCallRuntime()); 6635 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls); 6636 codegen_->AddSlowPath(slow_path); 6637 6638 if (generate_null_check) { 6639 __ testl(out, out); 6640 __ j(kEqual, slow_path->GetEntryLabel()); 6641 } 6642 6643 if (cls->MustGenerateClinitCheck()) { 6644 GenerateClassInitializationCheck(slow_path, out); 6645 } else { 6646 __ Bind(slow_path->GetExitLabel()); 6647 } 6648 } 6649 } 6650 6651 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) { 6652 InvokeRuntimeCallingConvention calling_convention; 6653 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); 6654 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); 6655 } 6656 6657 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) { 6658 codegen_->GenerateLoadMethodHandleRuntimeCall(load); 6659 } 6660 6661 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) { 6662 InvokeRuntimeCallingConvention calling_convention; 6663 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); 6664 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); 6665 } 6666 6667 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) { 6668 codegen_->GenerateLoadMethodTypeRuntimeCall(load); 6669 } 6670 6671 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { 6672 LocationSummary* locations = 6673 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 6674 locations->SetInAt(0, Location::RequiresRegister()); 6675 if (check->HasUses()) { 6676 locations->SetOut(Location::SameAsFirstInput()); 6677 } 6678 // Rely on the type initialization to save everything we need. 6679 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6680 } 6681 6682 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { 6683 // We assume the class to not be null. 6684 SlowPathCode* slow_path = 6685 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check); 6686 codegen_->AddSlowPath(slow_path); 6687 GenerateClassInitializationCheck(slow_path, 6688 check->GetLocations()->InAt(0).AsRegister<Register>()); 6689 } 6690 6691 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( 6692 SlowPathCode* slow_path, Register class_reg) { 6693 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 6694 const size_t status_byte_offset = 6695 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 6696 constexpr uint32_t shifted_initialized_value = 6697 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); 6698 6699 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); 6700 __ j(kBelow, slow_path->GetEntryLabel()); 6701 __ Bind(slow_path->GetExitLabel()); 6702 // No need for memory fence, thanks to the X86 memory model. 6703 } 6704 6705 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, 6706 Register temp) { 6707 uint32_t path_to_root = check->GetBitstringPathToRoot(); 6708 uint32_t mask = check->GetBitstringMask(); 6709 DCHECK(IsPowerOfTwo(mask + 1)); 6710 size_t mask_bits = WhichPowerOf2(mask + 1); 6711 6712 if (mask_bits == 16u) { 6713 // Compare the bitstring in memory. 6714 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); 6715 } else { 6716 // /* uint32_t */ temp = temp->status_ 6717 __ movl(temp, Address(temp, mirror::Class::StatusOffset())); 6718 // Compare the bitstring bits using SUB. 6719 __ subl(temp, Immediate(path_to_root)); 6720 // Shift out bits that do not contribute to the comparison. 6721 __ shll(temp, Immediate(32u - mask_bits)); 6722 } 6723 } 6724 6725 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( 6726 HLoadString::LoadKind desired_string_load_kind) { 6727 switch (desired_string_load_kind) { 6728 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 6729 case HLoadString::LoadKind::kBootImageRelRo: 6730 case HLoadString::LoadKind::kBssEntry: 6731 DCHECK(!Runtime::Current()->UseJitCompilation()); 6732 break; 6733 case HLoadString::LoadKind::kJitBootImageAddress: 6734 case HLoadString::LoadKind::kJitTableAddress: 6735 DCHECK(Runtime::Current()->UseJitCompilation()); 6736 break; 6737 case HLoadString::LoadKind::kRuntimeCall: 6738 break; 6739 } 6740 return desired_string_load_kind; 6741 } 6742 6743 void LocationsBuilderX86::VisitLoadString(HLoadString* load) { 6744 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 6745 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 6746 HLoadString::LoadKind load_kind = load->GetLoadKind(); 6747 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || 6748 load_kind == HLoadString::LoadKind::kBootImageRelRo || 6749 load_kind == HLoadString::LoadKind::kBssEntry) { 6750 locations->SetInAt(0, Location::RequiresRegister()); 6751 } 6752 if (load_kind == HLoadString::LoadKind::kRuntimeCall) { 6753 locations->SetOut(Location::RegisterLocation(EAX)); 6754 } else { 6755 locations->SetOut(Location::RequiresRegister()); 6756 if (load_kind == HLoadString::LoadKind::kBssEntry) { 6757 if (!kUseReadBarrier || kUseBakerReadBarrier) { 6758 // Rely on the pResolveString to save everything. 6759 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6760 } else { 6761 // For non-Baker read barrier we have a temp-clobbering call. 6762 } 6763 } 6764 } 6765 } 6766 6767 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file, 6768 dex::StringIndex string_index, 6769 Handle<mirror::String> handle) { 6770 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 6771 // Add a patch entry and return the label. 6772 jit_string_patches_.emplace_back(&dex_file, string_index.index_); 6773 PatchInfo<Label>* info = &jit_string_patches_.back(); 6774 return &info->label; 6775 } 6776 6777 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 6778 // move. 6779 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 6780 LocationSummary* locations = load->GetLocations(); 6781 Location out_loc = locations->Out(); 6782 Register out = out_loc.AsRegister<Register>(); 6783 6784 switch (load->GetLoadKind()) { 6785 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 6786 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 6787 Register method_address = locations->InAt(0).AsRegister<Register>(); 6788 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); 6789 codegen_->RecordBootImageStringPatch(load); 6790 return; 6791 } 6792 case HLoadString::LoadKind::kBootImageRelRo: { 6793 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 6794 Register method_address = locations->InAt(0).AsRegister<Register>(); 6795 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); 6796 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(), 6797 codegen_->GetBootImageOffset(load)); 6798 return; 6799 } 6800 case HLoadString::LoadKind::kBssEntry: { 6801 Register method_address = locations->InAt(0).AsRegister<Register>(); 6802 Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset); 6803 Label* fixup_label = codegen_->NewStringBssEntryPatch(load); 6804 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ 6805 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 6806 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load); 6807 codegen_->AddSlowPath(slow_path); 6808 __ testl(out, out); 6809 __ j(kEqual, slow_path->GetEntryLabel()); 6810 __ Bind(slow_path->GetExitLabel()); 6811 return; 6812 } 6813 case HLoadString::LoadKind::kJitBootImageAddress: { 6814 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); 6815 DCHECK_NE(address, 0u); 6816 __ movl(out, Immediate(address)); 6817 return; 6818 } 6819 case HLoadString::LoadKind::kJitTableAddress: { 6820 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset); 6821 Label* fixup_label = codegen_->NewJitRootStringPatch( 6822 load->GetDexFile(), load->GetStringIndex(), load->GetString()); 6823 // /* GcRoot<mirror::String> */ out = *address 6824 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 6825 return; 6826 } 6827 default: 6828 break; 6829 } 6830 6831 // TODO: Re-add the compiler code to do string dex cache lookup again. 6832 InvokeRuntimeCallingConvention calling_convention; 6833 DCHECK_EQ(calling_convention.GetRegisterAt(0), out); 6834 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_)); 6835 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); 6836 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 6837 } 6838 6839 static Address GetExceptionTlsAddress() { 6840 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value()); 6841 } 6842 6843 void LocationsBuilderX86::VisitLoadException(HLoadException* load) { 6844 LocationSummary* locations = 6845 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 6846 locations->SetOut(Location::RequiresRegister()); 6847 } 6848 6849 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) { 6850 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress()); 6851 } 6852 6853 void LocationsBuilderX86::VisitClearException(HClearException* clear) { 6854 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 6855 } 6856 6857 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 6858 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0)); 6859 } 6860 6861 void LocationsBuilderX86::VisitThrow(HThrow* instruction) { 6862 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6863 instruction, LocationSummary::kCallOnMainOnly); 6864 InvokeRuntimeCallingConvention calling_convention; 6865 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6866 } 6867 6868 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { 6869 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 6870 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 6871 } 6872 6873 // Temp is used for read barrier. 6874 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 6875 if (kEmitCompilerReadBarrier && 6876 !kUseBakerReadBarrier && 6877 (type_check_kind == TypeCheckKind::kAbstractClassCheck || 6878 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 6879 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 6880 return 1; 6881 } 6882 return 0; 6883 } 6884 6885 // Interface case has 2 temps, one for holding the number of interfaces, one for the current 6886 // interface pointer, the current interface is compared in memory. 6887 // The other checks have one temp for loading the object's class. 6888 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 6889 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 6890 return 2; 6891 } 6892 return 1 + NumberOfInstanceOfTemps(type_check_kind); 6893 } 6894 6895 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { 6896 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 6897 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6898 bool baker_read_barrier_slow_path = false; 6899 switch (type_check_kind) { 6900 case TypeCheckKind::kExactCheck: 6901 case TypeCheckKind::kAbstractClassCheck: 6902 case TypeCheckKind::kClassHierarchyCheck: 6903 case TypeCheckKind::kArrayObjectCheck: { 6904 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 6905 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 6906 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 6907 break; 6908 } 6909 case TypeCheckKind::kArrayCheck: 6910 case TypeCheckKind::kUnresolvedCheck: 6911 case TypeCheckKind::kInterfaceCheck: 6912 call_kind = LocationSummary::kCallOnSlowPath; 6913 break; 6914 case TypeCheckKind::kBitstringCheck: 6915 break; 6916 } 6917 6918 LocationSummary* locations = 6919 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 6920 if (baker_read_barrier_slow_path) { 6921 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 6922 } 6923 locations->SetInAt(0, Location::RequiresRegister()); 6924 if (type_check_kind == TypeCheckKind::kBitstringCheck) { 6925 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 6926 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 6927 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 6928 } else { 6929 locations->SetInAt(1, Location::Any()); 6930 } 6931 // Note that TypeCheckSlowPathX86 uses this "out" register too. 6932 locations->SetOut(Location::RequiresRegister()); 6933 // When read barriers are enabled, we need a temporary register for some cases. 6934 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 6935 } 6936 6937 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { 6938 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6939 LocationSummary* locations = instruction->GetLocations(); 6940 Location obj_loc = locations->InAt(0); 6941 Register obj = obj_loc.AsRegister<Register>(); 6942 Location cls = locations->InAt(1); 6943 Location out_loc = locations->Out(); 6944 Register out = out_loc.AsRegister<Register>(); 6945 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 6946 DCHECK_LE(num_temps, 1u); 6947 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); 6948 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6949 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6950 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6951 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 6952 SlowPathCode* slow_path = nullptr; 6953 NearLabel done, zero; 6954 6955 // Return 0 if `obj` is null. 6956 // Avoid null check if we know obj is not null. 6957 if (instruction->MustDoNullCheck()) { 6958 __ testl(obj, obj); 6959 __ j(kEqual, &zero); 6960 } 6961 6962 switch (type_check_kind) { 6963 case TypeCheckKind::kExactCheck: { 6964 ReadBarrierOption read_barrier_option = 6965 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6966 // /* HeapReference<Class> */ out = obj->klass_ 6967 GenerateReferenceLoadTwoRegisters(instruction, 6968 out_loc, 6969 obj_loc, 6970 class_offset, 6971 read_barrier_option); 6972 if (cls.IsRegister()) { 6973 __ cmpl(out, cls.AsRegister<Register>()); 6974 } else { 6975 DCHECK(cls.IsStackSlot()) << cls; 6976 __ cmpl(out, Address(ESP, cls.GetStackIndex())); 6977 } 6978 6979 // Classes must be equal for the instanceof to succeed. 6980 __ j(kNotEqual, &zero); 6981 __ movl(out, Immediate(1)); 6982 __ jmp(&done); 6983 break; 6984 } 6985 6986 case TypeCheckKind::kAbstractClassCheck: { 6987 ReadBarrierOption read_barrier_option = 6988 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6989 // /* HeapReference<Class> */ out = obj->klass_ 6990 GenerateReferenceLoadTwoRegisters(instruction, 6991 out_loc, 6992 obj_loc, 6993 class_offset, 6994 read_barrier_option); 6995 // If the class is abstract, we eagerly fetch the super class of the 6996 // object to avoid doing a comparison we know will fail. 6997 NearLabel loop; 6998 __ Bind(&loop); 6999 // /* HeapReference<Class> */ out = out->super_class_ 7000 GenerateReferenceLoadOneRegister(instruction, 7001 out_loc, 7002 super_offset, 7003 maybe_temp_loc, 7004 read_barrier_option); 7005 __ testl(out, out); 7006 // If `out` is null, we use it for the result, and jump to `done`. 7007 __ j(kEqual, &done); 7008 if (cls.IsRegister()) { 7009 __ cmpl(out, cls.AsRegister<Register>()); 7010 } else { 7011 DCHECK(cls.IsStackSlot()) << cls; 7012 __ cmpl(out, Address(ESP, cls.GetStackIndex())); 7013 } 7014 __ j(kNotEqual, &loop); 7015 __ movl(out, Immediate(1)); 7016 if (zero.IsLinked()) { 7017 __ jmp(&done); 7018 } 7019 break; 7020 } 7021 7022 case TypeCheckKind::kClassHierarchyCheck: { 7023 ReadBarrierOption read_barrier_option = 7024 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 7025 // /* HeapReference<Class> */ out = obj->klass_ 7026 GenerateReferenceLoadTwoRegisters(instruction, 7027 out_loc, 7028 obj_loc, 7029 class_offset, 7030 read_barrier_option); 7031 // Walk over the class hierarchy to find a match. 7032 NearLabel loop, success; 7033 __ Bind(&loop); 7034 if (cls.IsRegister()) { 7035 __ cmpl(out, cls.AsRegister<Register>()); 7036 } else { 7037 DCHECK(cls.IsStackSlot()) << cls; 7038 __ cmpl(out, Address(ESP, cls.GetStackIndex())); 7039 } 7040 __ j(kEqual, &success); 7041 // /* HeapReference<Class> */ out = out->super_class_ 7042 GenerateReferenceLoadOneRegister(instruction, 7043 out_loc, 7044 super_offset, 7045 maybe_temp_loc, 7046 read_barrier_option); 7047 __ testl(out, out); 7048 __ j(kNotEqual, &loop); 7049 // If `out` is null, we use it for the result, and jump to `done`. 7050 __ jmp(&done); 7051 __ Bind(&success); 7052 __ movl(out, Immediate(1)); 7053 if (zero.IsLinked()) { 7054 __ jmp(&done); 7055 } 7056 break; 7057 } 7058 7059 case TypeCheckKind::kArrayObjectCheck: { 7060 ReadBarrierOption read_barrier_option = 7061 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 7062 // /* HeapReference<Class> */ out = obj->klass_ 7063 GenerateReferenceLoadTwoRegisters(instruction, 7064 out_loc, 7065 obj_loc, 7066 class_offset, 7067 read_barrier_option); 7068 // Do an exact check. 7069 NearLabel exact_check; 7070 if (cls.IsRegister()) { 7071 __ cmpl(out, cls.AsRegister<Register>()); 7072 } else { 7073 DCHECK(cls.IsStackSlot()) << cls; 7074 __ cmpl(out, Address(ESP, cls.GetStackIndex())); 7075 } 7076 __ j(kEqual, &exact_check); 7077 // Otherwise, we need to check that the object's class is a non-primitive array. 7078 // /* HeapReference<Class> */ out = out->component_type_ 7079 GenerateReferenceLoadOneRegister(instruction, 7080 out_loc, 7081 component_offset, 7082 maybe_temp_loc, 7083 read_barrier_option); 7084 __ testl(out, out); 7085 // If `out` is null, we use it for the result, and jump to `done`. 7086 __ j(kEqual, &done); 7087 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot)); 7088 __ j(kNotEqual, &zero); 7089 __ Bind(&exact_check); 7090 __ movl(out, Immediate(1)); 7091 __ jmp(&done); 7092 break; 7093 } 7094 7095 case TypeCheckKind::kArrayCheck: { 7096 // No read barrier since the slow path will retry upon failure. 7097 // /* HeapReference<Class> */ out = obj->klass_ 7098 GenerateReferenceLoadTwoRegisters(instruction, 7099 out_loc, 7100 obj_loc, 7101 class_offset, 7102 kWithoutReadBarrier); 7103 if (cls.IsRegister()) { 7104 __ cmpl(out, cls.AsRegister<Register>()); 7105 } else { 7106 DCHECK(cls.IsStackSlot()) << cls; 7107 __ cmpl(out, Address(ESP, cls.GetStackIndex())); 7108 } 7109 DCHECK(locations->OnlyCallsOnSlowPath()); 7110 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( 7111 instruction, /* is_fatal= */ false); 7112 codegen_->AddSlowPath(slow_path); 7113 __ j(kNotEqual, slow_path->GetEntryLabel()); 7114 __ movl(out, Immediate(1)); 7115 if (zero.IsLinked()) { 7116 __ jmp(&done); 7117 } 7118 break; 7119 } 7120 7121 case TypeCheckKind::kUnresolvedCheck: 7122 case TypeCheckKind::kInterfaceCheck: { 7123 // Note that we indeed only call on slow path, but we always go 7124 // into the slow path for the unresolved and interface check 7125 // cases. 7126 // 7127 // We cannot directly call the InstanceofNonTrivial runtime 7128 // entry point without resorting to a type checking slow path 7129 // here (i.e. by calling InvokeRuntime directly), as it would 7130 // require to assign fixed registers for the inputs of this 7131 // HInstanceOf instruction (following the runtime calling 7132 // convention), which might be cluttered by the potential first 7133 // read barrier emission at the beginning of this method. 7134 // 7135 // TODO: Introduce a new runtime entry point taking the object 7136 // to test (instead of its class) as argument, and let it deal 7137 // with the read barrier issues. This will let us refactor this 7138 // case of the `switch` code as it was previously (with a direct 7139 // call to the runtime not using a type checking slow path). 7140 // This should also be beneficial for the other cases above. 7141 DCHECK(locations->OnlyCallsOnSlowPath()); 7142 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( 7143 instruction, /* is_fatal= */ false); 7144 codegen_->AddSlowPath(slow_path); 7145 __ jmp(slow_path->GetEntryLabel()); 7146 if (zero.IsLinked()) { 7147 __ jmp(&done); 7148 } 7149 break; 7150 } 7151 7152 case TypeCheckKind::kBitstringCheck: { 7153 // /* HeapReference<Class> */ temp = obj->klass_ 7154 GenerateReferenceLoadTwoRegisters(instruction, 7155 out_loc, 7156 obj_loc, 7157 class_offset, 7158 kWithoutReadBarrier); 7159 7160 GenerateBitstringTypeCheckCompare(instruction, out); 7161 __ j(kNotEqual, &zero); 7162 __ movl(out, Immediate(1)); 7163 __ jmp(&done); 7164 break; 7165 } 7166 } 7167 7168 if (zero.IsLinked()) { 7169 __ Bind(&zero); 7170 __ xorl(out, out); 7171 } 7172 7173 if (done.IsLinked()) { 7174 __ Bind(&done); 7175 } 7176 7177 if (slow_path != nullptr) { 7178 __ Bind(slow_path->GetExitLabel()); 7179 } 7180 } 7181 7182 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { 7183 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 7184 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 7185 LocationSummary* locations = 7186 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 7187 locations->SetInAt(0, Location::RequiresRegister()); 7188 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 7189 // Require a register for the interface check since there is a loop that compares the class to 7190 // a memory address. 7191 locations->SetInAt(1, Location::RequiresRegister()); 7192 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { 7193 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 7194 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 7195 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 7196 } else { 7197 locations->SetInAt(1, Location::Any()); 7198 } 7199 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. 7200 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 7201 } 7202 7203 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { 7204 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 7205 LocationSummary* locations = instruction->GetLocations(); 7206 Location obj_loc = locations->InAt(0); 7207 Register obj = obj_loc.AsRegister<Register>(); 7208 Location cls = locations->InAt(1); 7209 Location temp_loc = locations->GetTemp(0); 7210 Register temp = temp_loc.AsRegister<Register>(); 7211 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 7212 DCHECK_GE(num_temps, 1u); 7213 DCHECK_LE(num_temps, 2u); 7214 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); 7215 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 7216 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 7217 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 7218 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 7219 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 7220 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 7221 const uint32_t object_array_data_offset = 7222 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 7223 7224 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 7225 SlowPathCode* type_check_slow_path = 7226 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( 7227 instruction, is_type_check_slow_path_fatal); 7228 codegen_->AddSlowPath(type_check_slow_path); 7229 7230 NearLabel done; 7231 // Avoid null check if we know obj is not null. 7232 if (instruction->MustDoNullCheck()) { 7233 __ testl(obj, obj); 7234 __ j(kEqual, &done); 7235 } 7236 7237 switch (type_check_kind) { 7238 case TypeCheckKind::kExactCheck: 7239 case TypeCheckKind::kArrayCheck: { 7240 // /* HeapReference<Class> */ temp = obj->klass_ 7241 GenerateReferenceLoadTwoRegisters(instruction, 7242 temp_loc, 7243 obj_loc, 7244 class_offset, 7245 kWithoutReadBarrier); 7246 7247 if (cls.IsRegister()) { 7248 __ cmpl(temp, cls.AsRegister<Register>()); 7249 } else { 7250 DCHECK(cls.IsStackSlot()) << cls; 7251 __ cmpl(temp, Address(ESP, cls.GetStackIndex())); 7252 } 7253 // Jump to slow path for throwing the exception or doing a 7254 // more involved array check. 7255 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 7256 break; 7257 } 7258 7259 case TypeCheckKind::kAbstractClassCheck: { 7260 // /* HeapReference<Class> */ temp = obj->klass_ 7261 GenerateReferenceLoadTwoRegisters(instruction, 7262 temp_loc, 7263 obj_loc, 7264 class_offset, 7265 kWithoutReadBarrier); 7266 7267 // If the class is abstract, we eagerly fetch the super class of the 7268 // object to avoid doing a comparison we know will fail. 7269 NearLabel loop; 7270 __ Bind(&loop); 7271 // /* HeapReference<Class> */ temp = temp->super_class_ 7272 GenerateReferenceLoadOneRegister(instruction, 7273 temp_loc, 7274 super_offset, 7275 maybe_temp2_loc, 7276 kWithoutReadBarrier); 7277 7278 // If the class reference currently in `temp` is null, jump to the slow path to throw the 7279 // exception. 7280 __ testl(temp, temp); 7281 __ j(kZero, type_check_slow_path->GetEntryLabel()); 7282 7283 // Otherwise, compare the classes 7284 if (cls.IsRegister()) { 7285 __ cmpl(temp, cls.AsRegister<Register>()); 7286 } else { 7287 DCHECK(cls.IsStackSlot()) << cls; 7288 __ cmpl(temp, Address(ESP, cls.GetStackIndex())); 7289 } 7290 __ j(kNotEqual, &loop); 7291 break; 7292 } 7293 7294 case TypeCheckKind::kClassHierarchyCheck: { 7295 // /* HeapReference<Class> */ temp = obj->klass_ 7296 GenerateReferenceLoadTwoRegisters(instruction, 7297 temp_loc, 7298 obj_loc, 7299 class_offset, 7300 kWithoutReadBarrier); 7301 7302 // Walk over the class hierarchy to find a match. 7303 NearLabel loop; 7304 __ Bind(&loop); 7305 if (cls.IsRegister()) { 7306 __ cmpl(temp, cls.AsRegister<Register>()); 7307 } else { 7308 DCHECK(cls.IsStackSlot()) << cls; 7309 __ cmpl(temp, Address(ESP, cls.GetStackIndex())); 7310 } 7311 __ j(kEqual, &done); 7312 7313 // /* HeapReference<Class> */ temp = temp->super_class_ 7314 GenerateReferenceLoadOneRegister(instruction, 7315 temp_loc, 7316 super_offset, 7317 maybe_temp2_loc, 7318 kWithoutReadBarrier); 7319 7320 // If the class reference currently in `temp` is not null, jump 7321 // back at the beginning of the loop. 7322 __ testl(temp, temp); 7323 __ j(kNotZero, &loop); 7324 // Otherwise, jump to the slow path to throw the exception.; 7325 __ jmp(type_check_slow_path->GetEntryLabel()); 7326 break; 7327 } 7328 7329 case TypeCheckKind::kArrayObjectCheck: { 7330 // /* HeapReference<Class> */ temp = obj->klass_ 7331 GenerateReferenceLoadTwoRegisters(instruction, 7332 temp_loc, 7333 obj_loc, 7334 class_offset, 7335 kWithoutReadBarrier); 7336 7337 // Do an exact check. 7338 if (cls.IsRegister()) { 7339 __ cmpl(temp, cls.AsRegister<Register>()); 7340 } else { 7341 DCHECK(cls.IsStackSlot()) << cls; 7342 __ cmpl(temp, Address(ESP, cls.GetStackIndex())); 7343 } 7344 __ j(kEqual, &done); 7345 7346 // Otherwise, we need to check that the object's class is a non-primitive array. 7347 // /* HeapReference<Class> */ temp = temp->component_type_ 7348 GenerateReferenceLoadOneRegister(instruction, 7349 temp_loc, 7350 component_offset, 7351 maybe_temp2_loc, 7352 kWithoutReadBarrier); 7353 7354 // If the component type is null (i.e. the object not an array), jump to the slow path to 7355 // throw the exception. Otherwise proceed with the check. 7356 __ testl(temp, temp); 7357 __ j(kZero, type_check_slow_path->GetEntryLabel()); 7358 7359 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); 7360 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 7361 break; 7362 } 7363 7364 case TypeCheckKind::kUnresolvedCheck: 7365 // We always go into the type check slow path for the unresolved check case. 7366 // We cannot directly call the CheckCast runtime entry point 7367 // without resorting to a type checking slow path here (i.e. by 7368 // calling InvokeRuntime directly), as it would require to 7369 // assign fixed registers for the inputs of this HInstanceOf 7370 // instruction (following the runtime calling convention), which 7371 // might be cluttered by the potential first read barrier 7372 // emission at the beginning of this method. 7373 __ jmp(type_check_slow_path->GetEntryLabel()); 7374 break; 7375 7376 case TypeCheckKind::kInterfaceCheck: { 7377 // Fast path for the interface check. Try to avoid read barriers to improve the fast path. 7378 // We can not get false positives by doing this. 7379 // /* HeapReference<Class> */ temp = obj->klass_ 7380 GenerateReferenceLoadTwoRegisters(instruction, 7381 temp_loc, 7382 obj_loc, 7383 class_offset, 7384 kWithoutReadBarrier); 7385 7386 // /* HeapReference<Class> */ temp = temp->iftable_ 7387 GenerateReferenceLoadTwoRegisters(instruction, 7388 temp_loc, 7389 temp_loc, 7390 iftable_offset, 7391 kWithoutReadBarrier); 7392 // Iftable is never null. 7393 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset)); 7394 // Maybe poison the `cls` for direct comparison with memory. 7395 __ MaybePoisonHeapReference(cls.AsRegister<Register>()); 7396 // Loop through the iftable and check if any class matches. 7397 NearLabel start_loop; 7398 __ Bind(&start_loop); 7399 // Need to subtract first to handle the empty array case. 7400 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2)); 7401 __ j(kNegative, type_check_slow_path->GetEntryLabel()); 7402 // Go to next interface if the classes do not match. 7403 __ cmpl(cls.AsRegister<Register>(), 7404 CodeGeneratorX86::ArrayAddress(temp, 7405 maybe_temp2_loc, 7406 TIMES_4, 7407 object_array_data_offset)); 7408 __ j(kNotEqual, &start_loop); 7409 // If `cls` was poisoned above, unpoison it. 7410 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); 7411 break; 7412 } 7413 7414 case TypeCheckKind::kBitstringCheck: { 7415 // /* HeapReference<Class> */ temp = obj->klass_ 7416 GenerateReferenceLoadTwoRegisters(instruction, 7417 temp_loc, 7418 obj_loc, 7419 class_offset, 7420 kWithoutReadBarrier); 7421 7422 GenerateBitstringTypeCheckCompare(instruction, temp); 7423 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 7424 break; 7425 } 7426 } 7427 __ Bind(&done); 7428 7429 __ Bind(type_check_slow_path->GetExitLabel()); 7430 } 7431 7432 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) { 7433 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 7434 instruction, LocationSummary::kCallOnMainOnly); 7435 InvokeRuntimeCallingConvention calling_convention; 7436 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 7437 } 7438 7439 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) { 7440 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject 7441 : kQuickUnlockObject, 7442 instruction, 7443 instruction->GetDexPc()); 7444 if (instruction->IsEnter()) { 7445 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 7446 } else { 7447 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 7448 } 7449 } 7450 7451 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) { 7452 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); 7453 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); 7454 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 7455 locations->SetInAt(0, Location::RequiresRegister()); 7456 locations->SetInAt(1, Location::RequiresRegister()); 7457 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 7458 } 7459 7460 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) { 7461 LocationSummary* locations = instruction->GetLocations(); 7462 Location first = locations->InAt(0); 7463 Location second = locations->InAt(1); 7464 Location dest = locations->Out(); 7465 if (instruction->GetResultType() == DataType::Type::kInt32) { 7466 __ andn(dest.AsRegister<Register>(), 7467 first.AsRegister<Register>(), 7468 second.AsRegister<Register>()); 7469 } else { 7470 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 7471 __ andn(dest.AsRegisterPairLow<Register>(), 7472 first.AsRegisterPairLow<Register>(), 7473 second.AsRegisterPairLow<Register>()); 7474 __ andn(dest.AsRegisterPairHigh<Register>(), 7475 first.AsRegisterPairHigh<Register>(), 7476 second.AsRegisterPairHigh<Register>()); 7477 } 7478 } 7479 7480 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { 7481 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); 7482 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType(); 7483 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 7484 locations->SetInAt(0, Location::RequiresRegister()); 7485 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 7486 } 7487 7488 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit( 7489 HX86MaskOrResetLeastSetBit* instruction) { 7490 LocationSummary* locations = instruction->GetLocations(); 7491 Location src = locations->InAt(0); 7492 Location dest = locations->Out(); 7493 DCHECK(instruction->GetResultType() == DataType::Type::kInt32); 7494 switch (instruction->GetOpKind()) { 7495 case HInstruction::kAnd: 7496 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>()); 7497 break; 7498 case HInstruction::kXor: 7499 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>()); 7500 break; 7501 default: 7502 LOG(FATAL) << "Unreachable"; 7503 } 7504 } 7505 7506 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } 7507 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } 7508 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } 7509 7510 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) { 7511 LocationSummary* locations = 7512 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 7513 DCHECK(instruction->GetResultType() == DataType::Type::kInt32 7514 || instruction->GetResultType() == DataType::Type::kInt64); 7515 locations->SetInAt(0, Location::RequiresRegister()); 7516 locations->SetInAt(1, Location::Any()); 7517 locations->SetOut(Location::SameAsFirstInput()); 7518 } 7519 7520 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) { 7521 HandleBitwiseOperation(instruction); 7522 } 7523 7524 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) { 7525 HandleBitwiseOperation(instruction); 7526 } 7527 7528 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) { 7529 HandleBitwiseOperation(instruction); 7530 } 7531 7532 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) { 7533 LocationSummary* locations = instruction->GetLocations(); 7534 Location first = locations->InAt(0); 7535 Location second = locations->InAt(1); 7536 DCHECK(first.Equals(locations->Out())); 7537 7538 if (instruction->GetResultType() == DataType::Type::kInt32) { 7539 if (second.IsRegister()) { 7540 if (instruction->IsAnd()) { 7541 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>()); 7542 } else if (instruction->IsOr()) { 7543 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>()); 7544 } else { 7545 DCHECK(instruction->IsXor()); 7546 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>()); 7547 } 7548 } else if (second.IsConstant()) { 7549 if (instruction->IsAnd()) { 7550 __ andl(first.AsRegister<Register>(), 7551 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 7552 } else if (instruction->IsOr()) { 7553 __ orl(first.AsRegister<Register>(), 7554 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 7555 } else { 7556 DCHECK(instruction->IsXor()); 7557 __ xorl(first.AsRegister<Register>(), 7558 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 7559 } 7560 } else { 7561 if (instruction->IsAnd()) { 7562 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); 7563 } else if (instruction->IsOr()) { 7564 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); 7565 } else { 7566 DCHECK(instruction->IsXor()); 7567 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); 7568 } 7569 } 7570 } else { 7571 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 7572 if (second.IsRegisterPair()) { 7573 if (instruction->IsAnd()) { 7574 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); 7575 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); 7576 } else if (instruction->IsOr()) { 7577 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); 7578 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); 7579 } else { 7580 DCHECK(instruction->IsXor()); 7581 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); 7582 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); 7583 } 7584 } else if (second.IsDoubleStackSlot()) { 7585 if (instruction->IsAnd()) { 7586 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); 7587 __ andl(first.AsRegisterPairHigh<Register>(), 7588 Address(ESP, second.GetHighStackIndex(kX86WordSize))); 7589 } else if (instruction->IsOr()) { 7590 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); 7591 __ orl(first.AsRegisterPairHigh<Register>(), 7592 Address(ESP, second.GetHighStackIndex(kX86WordSize))); 7593 } else { 7594 DCHECK(instruction->IsXor()); 7595 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); 7596 __ xorl(first.AsRegisterPairHigh<Register>(), 7597 Address(ESP, second.GetHighStackIndex(kX86WordSize))); 7598 } 7599 } else { 7600 DCHECK(second.IsConstant()) << second; 7601 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 7602 int32_t low_value = Low32Bits(value); 7603 int32_t high_value = High32Bits(value); 7604 Immediate low(low_value); 7605 Immediate high(high_value); 7606 Register first_low = first.AsRegisterPairLow<Register>(); 7607 Register first_high = first.AsRegisterPairHigh<Register>(); 7608 if (instruction->IsAnd()) { 7609 if (low_value == 0) { 7610 __ xorl(first_low, first_low); 7611 } else if (low_value != -1) { 7612 __ andl(first_low, low); 7613 } 7614 if (high_value == 0) { 7615 __ xorl(first_high, first_high); 7616 } else if (high_value != -1) { 7617 __ andl(first_high, high); 7618 } 7619 } else if (instruction->IsOr()) { 7620 if (low_value != 0) { 7621 __ orl(first_low, low); 7622 } 7623 if (high_value != 0) { 7624 __ orl(first_high, high); 7625 } 7626 } else { 7627 DCHECK(instruction->IsXor()); 7628 if (low_value != 0) { 7629 __ xorl(first_low, low); 7630 } 7631 if (high_value != 0) { 7632 __ xorl(first_high, high); 7633 } 7634 } 7635 } 7636 } 7637 } 7638 7639 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister( 7640 HInstruction* instruction, 7641 Location out, 7642 uint32_t offset, 7643 Location maybe_temp, 7644 ReadBarrierOption read_barrier_option) { 7645 Register out_reg = out.AsRegister<Register>(); 7646 if (read_barrier_option == kWithReadBarrier) { 7647 CHECK(kEmitCompilerReadBarrier); 7648 if (kUseBakerReadBarrier) { 7649 // Load with fast path based Baker's read barrier. 7650 // /* HeapReference<Object> */ out = *(out + offset) 7651 codegen_->GenerateFieldLoadWithBakerReadBarrier( 7652 instruction, out, out_reg, offset, /* needs_null_check= */ false); 7653 } else { 7654 // Load with slow path based read barrier. 7655 // Save the value of `out` into `maybe_temp` before overwriting it 7656 // in the following move operation, as we will need it for the 7657 // read barrier below. 7658 DCHECK(maybe_temp.IsRegister()) << maybe_temp; 7659 __ movl(maybe_temp.AsRegister<Register>(), out_reg); 7660 // /* HeapReference<Object> */ out = *(out + offset) 7661 __ movl(out_reg, Address(out_reg, offset)); 7662 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 7663 } 7664 } else { 7665 // Plain load with no read barrier. 7666 // /* HeapReference<Object> */ out = *(out + offset) 7667 __ movl(out_reg, Address(out_reg, offset)); 7668 __ MaybeUnpoisonHeapReference(out_reg); 7669 } 7670 } 7671 7672 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters( 7673 HInstruction* instruction, 7674 Location out, 7675 Location obj, 7676 uint32_t offset, 7677 ReadBarrierOption read_barrier_option) { 7678 Register out_reg = out.AsRegister<Register>(); 7679 Register obj_reg = obj.AsRegister<Register>(); 7680 if (read_barrier_option == kWithReadBarrier) { 7681 CHECK(kEmitCompilerReadBarrier); 7682 if (kUseBakerReadBarrier) { 7683 // Load with fast path based Baker's read barrier. 7684 // /* HeapReference<Object> */ out = *(obj + offset) 7685 codegen_->GenerateFieldLoadWithBakerReadBarrier( 7686 instruction, out, obj_reg, offset, /* needs_null_check= */ false); 7687 } else { 7688 // Load with slow path based read barrier. 7689 // /* HeapReference<Object> */ out = *(obj + offset) 7690 __ movl(out_reg, Address(obj_reg, offset)); 7691 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 7692 } 7693 } else { 7694 // Plain load with no read barrier. 7695 // /* HeapReference<Object> */ out = *(obj + offset) 7696 __ movl(out_reg, Address(obj_reg, offset)); 7697 __ MaybeUnpoisonHeapReference(out_reg); 7698 } 7699 } 7700 7701 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( 7702 HInstruction* instruction, 7703 Location root, 7704 const Address& address, 7705 Label* fixup_label, 7706 ReadBarrierOption read_barrier_option) { 7707 Register root_reg = root.AsRegister<Register>(); 7708 if (read_barrier_option == kWithReadBarrier) { 7709 DCHECK(kEmitCompilerReadBarrier); 7710 if (kUseBakerReadBarrier) { 7711 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 7712 // Baker's read barrier are used: 7713 // 7714 // root = obj.field; 7715 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() 7716 // if (temp != null) { 7717 // root = temp(root) 7718 // } 7719 7720 // /* GcRoot<mirror::Object> */ root = *address 7721 __ movl(root_reg, address); 7722 if (fixup_label != nullptr) { 7723 __ Bind(fixup_label); 7724 } 7725 static_assert( 7726 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 7727 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 7728 "have different sizes."); 7729 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 7730 "art::mirror::CompressedReference<mirror::Object> and int32_t " 7731 "have different sizes."); 7732 7733 // Slow path marking the GC root `root`. 7734 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86( 7735 instruction, root, /* unpoison_ref_before_marking= */ false); 7736 codegen_->AddSlowPath(slow_path); 7737 7738 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`). 7739 const int32_t entry_point_offset = 7740 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg()); 7741 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0)); 7742 // The entrypoint is null when the GC is not marking. 7743 __ j(kNotEqual, slow_path->GetEntryLabel()); 7744 __ Bind(slow_path->GetExitLabel()); 7745 } else { 7746 // GC root loaded through a slow path for read barriers other 7747 // than Baker's. 7748 // /* GcRoot<mirror::Object>* */ root = address 7749 __ leal(root_reg, address); 7750 if (fixup_label != nullptr) { 7751 __ Bind(fixup_label); 7752 } 7753 // /* mirror::Object* */ root = root->Read() 7754 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 7755 } 7756 } else { 7757 // Plain GC root load with no read barrier. 7758 // /* GcRoot<mirror::Object> */ root = *address 7759 __ movl(root_reg, address); 7760 if (fixup_label != nullptr) { 7761 __ Bind(fixup_label); 7762 } 7763 // Note that GC roots are not affected by heap poisoning, thus we 7764 // do not have to unpoison `root_reg` here. 7765 } 7766 } 7767 7768 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 7769 Location ref, 7770 Register obj, 7771 uint32_t offset, 7772 bool needs_null_check) { 7773 DCHECK(kEmitCompilerReadBarrier); 7774 DCHECK(kUseBakerReadBarrier); 7775 7776 // /* HeapReference<Object> */ ref = *(obj + offset) 7777 Address src(obj, offset); 7778 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 7779 } 7780 7781 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 7782 Location ref, 7783 Register obj, 7784 uint32_t data_offset, 7785 Location index, 7786 bool needs_null_check) { 7787 DCHECK(kEmitCompilerReadBarrier); 7788 DCHECK(kUseBakerReadBarrier); 7789 7790 static_assert( 7791 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 7792 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 7793 // /* HeapReference<Object> */ ref = 7794 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 7795 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset); 7796 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 7797 } 7798 7799 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 7800 Location ref, 7801 Register obj, 7802 const Address& src, 7803 bool needs_null_check, 7804 bool always_update_field, 7805 Register* temp) { 7806 DCHECK(kEmitCompilerReadBarrier); 7807 DCHECK(kUseBakerReadBarrier); 7808 7809 // In slow path based read barriers, the read barrier call is 7810 // inserted after the original load. However, in fast path based 7811 // Baker's read barriers, we need to perform the load of 7812 // mirror::Object::monitor_ *before* the original reference load. 7813 // This load-load ordering is required by the read barrier. 7814 // The fast path/slow path (for Baker's algorithm) should look like: 7815 // 7816 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 7817 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 7818 // HeapReference<Object> ref = *src; // Original reference load. 7819 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 7820 // if (is_gray) { 7821 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. 7822 // } 7823 // 7824 // Note: the original implementation in ReadBarrier::Barrier is 7825 // slightly more complex as: 7826 // - it implements the load-load fence using a data dependency on 7827 // the high-bits of rb_state, which are expected to be all zeroes 7828 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here, 7829 // which is a no-op thanks to the x86 memory model); 7830 // - it performs additional checks that we do not do here for 7831 // performance reasons. 7832 7833 Register ref_reg = ref.AsRegister<Register>(); 7834 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 7835 7836 // Given the numeric representation, it's enough to check the low bit of the rb_state. 7837 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); 7838 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 7839 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 7840 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 7841 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 7842 7843 // if (rb_state == ReadBarrier::GrayState()) 7844 // ref = ReadBarrier::Mark(ref); 7845 // At this point, just do the "if" and make sure that flags are preserved until the branch. 7846 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); 7847 if (needs_null_check) { 7848 MaybeRecordImplicitNullCheck(instruction); 7849 } 7850 7851 // Load fence to prevent load-load reordering. 7852 // Note that this is a no-op, thanks to the x86 memory model. 7853 GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 7854 7855 // The actual reference load. 7856 // /* HeapReference<Object> */ ref = *src 7857 __ movl(ref_reg, src); // Flags are unaffected. 7858 7859 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. 7860 // Slow path marking the object `ref` when it is gray. 7861 SlowPathCode* slow_path; 7862 if (always_update_field) { 7863 DCHECK(temp != nullptr); 7864 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86( 7865 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp); 7866 } else { 7867 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86( 7868 instruction, ref, /* unpoison_ref_before_marking= */ true); 7869 } 7870 AddSlowPath(slow_path); 7871 7872 // We have done the "if" of the gray bit check above, now branch based on the flags. 7873 __ j(kNotZero, slow_path->GetEntryLabel()); 7874 7875 // Object* ref = ref_addr->AsMirrorPtr() 7876 __ MaybeUnpoisonHeapReference(ref_reg); 7877 7878 __ Bind(slow_path->GetExitLabel()); 7879 } 7880 7881 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction, 7882 Location out, 7883 Location ref, 7884 Location obj, 7885 uint32_t offset, 7886 Location index) { 7887 DCHECK(kEmitCompilerReadBarrier); 7888 7889 // Insert a slow path based read barrier *after* the reference load. 7890 // 7891 // If heap poisoning is enabled, the unpoisoning of the loaded 7892 // reference will be carried out by the runtime within the slow 7893 // path. 7894 // 7895 // Note that `ref` currently does not get unpoisoned (when heap 7896 // poisoning is enabled), which is alright as the `ref` argument is 7897 // not used by the artReadBarrierSlow entry point. 7898 // 7899 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 7900 SlowPathCode* slow_path = new (GetScopedAllocator()) 7901 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index); 7902 AddSlowPath(slow_path); 7903 7904 __ jmp(slow_path->GetEntryLabel()); 7905 __ Bind(slow_path->GetExitLabel()); 7906 } 7907 7908 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 7909 Location out, 7910 Location ref, 7911 Location obj, 7912 uint32_t offset, 7913 Location index) { 7914 if (kEmitCompilerReadBarrier) { 7915 // Baker's read barriers shall be handled by the fast path 7916 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier). 7917 DCHECK(!kUseBakerReadBarrier); 7918 // If heap poisoning is enabled, unpoisoning will be taken care of 7919 // by the runtime within the slow path. 7920 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 7921 } else if (kPoisonHeapReferences) { 7922 __ UnpoisonHeapReference(out.AsRegister<Register>()); 7923 } 7924 } 7925 7926 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, 7927 Location out, 7928 Location root) { 7929 DCHECK(kEmitCompilerReadBarrier); 7930 7931 // Insert a slow path based read barrier *after* the GC root load. 7932 // 7933 // Note that GC roots are not affected by heap poisoning, so we do 7934 // not need to do anything special for this here. 7935 SlowPathCode* slow_path = 7936 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root); 7937 AddSlowPath(slow_path); 7938 7939 __ jmp(slow_path->GetEntryLabel()); 7940 __ Bind(slow_path->GetExitLabel()); 7941 } 7942 7943 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 7944 // Nothing to do, this should be removed during prepare for register allocator. 7945 LOG(FATAL) << "Unreachable"; 7946 } 7947 7948 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 7949 // Nothing to do, this should be removed during prepare for register allocator. 7950 LOG(FATAL) << "Unreachable"; 7951 } 7952 7953 // Simple implementation of packed switch - generate cascaded compare/jumps. 7954 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { 7955 LocationSummary* locations = 7956 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 7957 locations->SetInAt(0, Location::RequiresRegister()); 7958 } 7959 7960 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg, 7961 int32_t lower_bound, 7962 uint32_t num_entries, 7963 HBasicBlock* switch_block, 7964 HBasicBlock* default_block) { 7965 // Figure out the correct compare values and jump conditions. 7966 // Handle the first compare/branch as a special case because it might 7967 // jump to the default case. 7968 DCHECK_GT(num_entries, 2u); 7969 Condition first_condition; 7970 uint32_t index; 7971 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); 7972 if (lower_bound != 0) { 7973 first_condition = kLess; 7974 __ cmpl(value_reg, Immediate(lower_bound)); 7975 __ j(first_condition, codegen_->GetLabelOf(default_block)); 7976 __ j(kEqual, codegen_->GetLabelOf(successors[0])); 7977 7978 index = 1; 7979 } else { 7980 // Handle all the compare/jumps below. 7981 first_condition = kBelow; 7982 index = 0; 7983 } 7984 7985 // Handle the rest of the compare/jumps. 7986 for (; index + 1 < num_entries; index += 2) { 7987 int32_t compare_to_value = lower_bound + index + 1; 7988 __ cmpl(value_reg, Immediate(compare_to_value)); 7989 // Jump to successors[index] if value < case_value[index]. 7990 __ j(first_condition, codegen_->GetLabelOf(successors[index])); 7991 // Jump to successors[index + 1] if value == case_value[index + 1]. 7992 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); 7993 } 7994 7995 if (index != num_entries) { 7996 // There are an odd number of entries. Handle the last one. 7997 DCHECK_EQ(index + 1, num_entries); 7998 __ cmpl(value_reg, Immediate(lower_bound + index)); 7999 __ j(kEqual, codegen_->GetLabelOf(successors[index])); 8000 } 8001 8002 // And the default for any other value. 8003 if (!codegen_->GoesToNextBlock(switch_block, default_block)) { 8004 __ jmp(codegen_->GetLabelOf(default_block)); 8005 } 8006 } 8007 8008 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { 8009 int32_t lower_bound = switch_instr->GetStartValue(); 8010 uint32_t num_entries = switch_instr->GetNumEntries(); 8011 LocationSummary* locations = switch_instr->GetLocations(); 8012 Register value_reg = locations->InAt(0).AsRegister<Register>(); 8013 8014 GenPackedSwitchWithCompares(value_reg, 8015 lower_bound, 8016 num_entries, 8017 switch_instr->GetBlock(), 8018 switch_instr->GetDefaultBlock()); 8019 } 8020 8021 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { 8022 LocationSummary* locations = 8023 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 8024 locations->SetInAt(0, Location::RequiresRegister()); 8025 8026 // Constant area pointer. 8027 locations->SetInAt(1, Location::RequiresRegister()); 8028 8029 // And the temporary we need. 8030 locations->AddTemp(Location::RequiresRegister()); 8031 } 8032 8033 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { 8034 int32_t lower_bound = switch_instr->GetStartValue(); 8035 uint32_t num_entries = switch_instr->GetNumEntries(); 8036 LocationSummary* locations = switch_instr->GetLocations(); 8037 Register value_reg = locations->InAt(0).AsRegister<Register>(); 8038 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 8039 8040 if (num_entries <= kPackedSwitchJumpTableThreshold) { 8041 GenPackedSwitchWithCompares(value_reg, 8042 lower_bound, 8043 num_entries, 8044 switch_instr->GetBlock(), 8045 default_block); 8046 return; 8047 } 8048 8049 // Optimizing has a jump area. 8050 Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); 8051 Register constant_area = locations->InAt(1).AsRegister<Register>(); 8052 8053 // Remove the bias, if needed. 8054 if (lower_bound != 0) { 8055 __ leal(temp_reg, Address(value_reg, -lower_bound)); 8056 value_reg = temp_reg; 8057 } 8058 8059 // Is the value in range? 8060 DCHECK_GE(num_entries, 1u); 8061 __ cmpl(value_reg, Immediate(num_entries - 1)); 8062 __ j(kAbove, codegen_->GetLabelOf(default_block)); 8063 8064 // We are in the range of the table. 8065 // Load (target-constant_area) from the jump table, indexing by the value. 8066 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg)); 8067 8068 // Compute the actual target address by adding in constant_area. 8069 __ addl(temp_reg, constant_area); 8070 8071 // And jump. 8072 __ jmp(temp_reg); 8073 } 8074 8075 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress( 8076 HX86ComputeBaseMethodAddress* insn) { 8077 LocationSummary* locations = 8078 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall); 8079 locations->SetOut(Location::RequiresRegister()); 8080 } 8081 8082 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress( 8083 HX86ComputeBaseMethodAddress* insn) { 8084 LocationSummary* locations = insn->GetLocations(); 8085 Register reg = locations->Out().AsRegister<Register>(); 8086 8087 // Generate call to next instruction. 8088 Label next_instruction; 8089 __ call(&next_instruction); 8090 __ Bind(&next_instruction); 8091 8092 // Remember this offset for later use with constant area. 8093 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize()); 8094 8095 // Grab the return address off the stack. 8096 __ popl(reg); 8097 } 8098 8099 void LocationsBuilderX86::VisitX86LoadFromConstantTable( 8100 HX86LoadFromConstantTable* insn) { 8101 LocationSummary* locations = 8102 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall); 8103 8104 locations->SetInAt(0, Location::RequiresRegister()); 8105 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant())); 8106 8107 // If we don't need to be materialized, we only need the inputs to be set. 8108 if (insn->IsEmittedAtUseSite()) { 8109 return; 8110 } 8111 8112 switch (insn->GetType()) { 8113 case DataType::Type::kFloat32: 8114 case DataType::Type::kFloat64: 8115 locations->SetOut(Location::RequiresFpuRegister()); 8116 break; 8117 8118 case DataType::Type::kInt32: 8119 locations->SetOut(Location::RequiresRegister()); 8120 break; 8121 8122 default: 8123 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType(); 8124 } 8125 } 8126 8127 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) { 8128 if (insn->IsEmittedAtUseSite()) { 8129 return; 8130 } 8131 8132 LocationSummary* locations = insn->GetLocations(); 8133 Location out = locations->Out(); 8134 Register const_area = locations->InAt(0).AsRegister<Register>(); 8135 HConstant *value = insn->GetConstant(); 8136 8137 switch (insn->GetType()) { 8138 case DataType::Type::kFloat32: 8139 __ movss(out.AsFpuRegister<XmmRegister>(), 8140 codegen_->LiteralFloatAddress( 8141 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); 8142 break; 8143 8144 case DataType::Type::kFloat64: 8145 __ movsd(out.AsFpuRegister<XmmRegister>(), 8146 codegen_->LiteralDoubleAddress( 8147 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); 8148 break; 8149 8150 case DataType::Type::kInt32: 8151 __ movl(out.AsRegister<Register>(), 8152 codegen_->LiteralInt32Address( 8153 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); 8154 break; 8155 8156 default: 8157 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType(); 8158 } 8159 } 8160 8161 /** 8162 * Class to handle late fixup of offsets into constant area. 8163 */ 8164 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { 8165 public: 8166 RIPFixup(CodeGeneratorX86& codegen, 8167 HX86ComputeBaseMethodAddress* base_method_address, 8168 size_t offset) 8169 : codegen_(&codegen), 8170 base_method_address_(base_method_address), 8171 offset_into_constant_area_(offset) {} 8172 8173 protected: 8174 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } 8175 8176 CodeGeneratorX86* codegen_; 8177 HX86ComputeBaseMethodAddress* base_method_address_; 8178 8179 private: 8180 void Process(const MemoryRegion& region, int pos) override { 8181 // Patch the correct offset for the instruction. The place to patch is the 8182 // last 4 bytes of the instruction. 8183 // The value to patch is the distance from the offset in the constant area 8184 // from the address computed by the HX86ComputeBaseMethodAddress instruction. 8185 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; 8186 int32_t relative_position = 8187 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_); 8188 8189 // Patch in the right value. 8190 region.StoreUnaligned<int32_t>(pos - 4, relative_position); 8191 } 8192 8193 // Location in constant area that the fixup refers to. 8194 int32_t offset_into_constant_area_; 8195 }; 8196 8197 /** 8198 * Class to handle late fixup of offsets to a jump table that will be created in the 8199 * constant area. 8200 */ 8201 class JumpTableRIPFixup : public RIPFixup { 8202 public: 8203 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr) 8204 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)), 8205 switch_instr_(switch_instr) {} 8206 8207 void CreateJumpTable() { 8208 X86Assembler* assembler = codegen_->GetAssembler(); 8209 8210 // Ensure that the reference to the jump table has the correct offset. 8211 const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); 8212 SetOffset(offset_in_constant_table); 8213 8214 // The label values in the jump table are computed relative to the 8215 // instruction addressing the constant area. 8216 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_); 8217 8218 // Populate the jump table with the correct values for the jump table. 8219 int32_t num_entries = switch_instr_->GetNumEntries(); 8220 HBasicBlock* block = switch_instr_->GetBlock(); 8221 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); 8222 // The value that we want is the target offset - the position of the table. 8223 for (int32_t i = 0; i < num_entries; i++) { 8224 HBasicBlock* b = successors[i]; 8225 Label* l = codegen_->GetLabelOf(b); 8226 DCHECK(l->IsBound()); 8227 int32_t offset_to_block = l->Position() - relative_offset; 8228 assembler->AppendInt32(offset_to_block); 8229 } 8230 } 8231 8232 private: 8233 const HX86PackedSwitch* switch_instr_; 8234 }; 8235 8236 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { 8237 // Generate the constant area if needed. 8238 X86Assembler* assembler = GetAssembler(); 8239 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { 8240 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 8241 // byte values. 8242 assembler->Align(4, 0); 8243 constant_area_start_ = assembler->CodeSize(); 8244 8245 // Populate any jump tables. 8246 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { 8247 jump_table->CreateJumpTable(); 8248 } 8249 8250 // And now add the constant area to the generated code. 8251 assembler->AddConstantArea(); 8252 } 8253 8254 // And finish up. 8255 CodeGenerator::Finalize(allocator); 8256 } 8257 8258 Address CodeGeneratorX86::LiteralDoubleAddress(double v, 8259 HX86ComputeBaseMethodAddress* method_base, 8260 Register reg) { 8261 AssemblerFixup* fixup = 8262 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v)); 8263 return Address(reg, kDummy32BitOffset, fixup); 8264 } 8265 8266 Address CodeGeneratorX86::LiteralFloatAddress(float v, 8267 HX86ComputeBaseMethodAddress* method_base, 8268 Register reg) { 8269 AssemblerFixup* fixup = 8270 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v)); 8271 return Address(reg, kDummy32BitOffset, fixup); 8272 } 8273 8274 Address CodeGeneratorX86::LiteralInt32Address(int32_t v, 8275 HX86ComputeBaseMethodAddress* method_base, 8276 Register reg) { 8277 AssemblerFixup* fixup = 8278 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v)); 8279 return Address(reg, kDummy32BitOffset, fixup); 8280 } 8281 8282 Address CodeGeneratorX86::LiteralInt64Address(int64_t v, 8283 HX86ComputeBaseMethodAddress* method_base, 8284 Register reg) { 8285 AssemblerFixup* fixup = 8286 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v)); 8287 return Address(reg, kDummy32BitOffset, fixup); 8288 } 8289 8290 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) { 8291 if (value == 0) { 8292 __ xorl(dest, dest); 8293 } else { 8294 __ movl(dest, Immediate(value)); 8295 } 8296 } 8297 8298 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) { 8299 if (value == 0) { 8300 __ testl(dest, dest); 8301 } else { 8302 __ cmpl(dest, Immediate(value)); 8303 } 8304 } 8305 8306 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { 8307 Register lhs_reg = lhs.AsRegister<Register>(); 8308 GenerateIntCompare(lhs_reg, rhs); 8309 } 8310 8311 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) { 8312 if (rhs.IsConstant()) { 8313 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); 8314 Compare32BitValue(lhs, value); 8315 } else if (rhs.IsStackSlot()) { 8316 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex())); 8317 } else { 8318 __ cmpl(lhs, rhs.AsRegister<Register>()); 8319 } 8320 } 8321 8322 Address CodeGeneratorX86::ArrayAddress(Register obj, 8323 Location index, 8324 ScaleFactor scale, 8325 uint32_t data_offset) { 8326 return index.IsConstant() ? 8327 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : 8328 Address(obj, index.AsRegister<Register>(), scale, data_offset); 8329 } 8330 8331 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, 8332 Register reg, 8333 Register value) { 8334 // Create a fixup to be used to create and address the jump table. 8335 JumpTableRIPFixup* table_fixup = 8336 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr); 8337 8338 // We have to populate the jump tables. 8339 fixups_to_jump_tables_.push_back(table_fixup); 8340 8341 // We want a scaled address, as we are extracting the correct offset from the table. 8342 return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup); 8343 } 8344 8345 // TODO: target as memory. 8346 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) { 8347 if (!target.IsValid()) { 8348 DCHECK_EQ(type, DataType::Type::kVoid); 8349 return; 8350 } 8351 8352 DCHECK_NE(type, DataType::Type::kVoid); 8353 8354 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type); 8355 if (target.Equals(return_loc)) { 8356 return; 8357 } 8358 8359 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged 8360 // with the else branch. 8361 if (type == DataType::Type::kInt64) { 8362 HParallelMove parallel_move(GetGraph()->GetAllocator()); 8363 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr); 8364 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr); 8365 GetMoveResolver()->EmitNativeCode(¶llel_move); 8366 } else { 8367 // Let the parallel move resolver take care of all of this. 8368 HParallelMove parallel_move(GetGraph()->GetAllocator()); 8369 parallel_move.AddMove(return_loc, target, type, nullptr); 8370 GetMoveResolver()->EmitNativeCode(¶llel_move); 8371 } 8372 } 8373 8374 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, 8375 const uint8_t* roots_data, 8376 const PatchInfo<Label>& info, 8377 uint64_t index_in_table) const { 8378 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 8379 uintptr_t address = 8380 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 8381 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; 8382 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = 8383 dchecked_integral_cast<uint32_t>(address); 8384 } 8385 8386 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 8387 for (const PatchInfo<Label>& info : jit_string_patches_) { 8388 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); 8389 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 8390 PatchJitRootUse(code, roots_data, info, index_in_table); 8391 } 8392 8393 for (const PatchInfo<Label>& info : jit_class_patches_) { 8394 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); 8395 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 8396 PatchJitRootUse(code, roots_data, info, index_in_table); 8397 } 8398 } 8399 8400 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction 8401 ATTRIBUTE_UNUSED) { 8402 LOG(FATAL) << "Unreachable"; 8403 } 8404 8405 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction 8406 ATTRIBUTE_UNUSED) { 8407 LOG(FATAL) << "Unreachable"; 8408 } 8409 8410 #undef __ 8411 8412 } // namespace x86 8413 } // namespace art 8414