Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_x86_64.h"
     18 
     19 #include "art_method.h"
     20 #include "code_generator_utils.h"
     21 #include "compiled_method.h"
     22 #include "entrypoints/quick/quick_entrypoints.h"
     23 #include "gc/accounting/card_table.h"
     24 #include "intrinsics.h"
     25 #include "intrinsics_x86_64.h"
     26 #include "lock_word.h"
     27 #include "mirror/array-inl.h"
     28 #include "mirror/class-inl.h"
     29 #include "mirror/object_reference.h"
     30 #include "thread.h"
     31 #include "utils/assembler.h"
     32 #include "utils/stack_checks.h"
     33 #include "utils/x86_64/assembler_x86_64.h"
     34 #include "utils/x86_64/managed_register_x86_64.h"
     35 
     36 namespace art {
     37 
     38 template<class MirrorType>
     39 class GcRoot;
     40 
     41 namespace x86_64 {
     42 
     43 static constexpr int kCurrentMethodStackOffset = 0;
     44 static constexpr Register kMethodRegisterArgument = RDI;
     45 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
     46 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
     47 // generates less code/data with a small num_entries.
     48 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
     49 
     50 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
     51 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
     52 
     53 static constexpr int kC2ConditionMask = 0x400;
     54 
     55 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     56 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
     57 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
     58 
     59 class NullCheckSlowPathX86_64 : public SlowPathCode {
     60  public:
     61   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
     62 
     63   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     64     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     65     __ Bind(GetEntryLabel());
     66     if (instruction_->CanThrowIntoCatchBlock()) {
     67       // Live registers will be restored in the catch block if caught.
     68       SaveLiveRegisters(codegen, instruction_->GetLocations());
     69     }
     70     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
     71                                   instruction_,
     72                                   instruction_->GetDexPc(),
     73                                   this);
     74     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
     75   }
     76 
     77   bool IsFatal() const OVERRIDE { return true; }
     78 
     79   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
     80 
     81  private:
     82   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
     83 };
     84 
     85 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
     86  public:
     87   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
     88 
     89   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     90     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     91     __ Bind(GetEntryLabel());
     92     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
     93     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
     94   }
     95 
     96   bool IsFatal() const OVERRIDE { return true; }
     97 
     98   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
     99 
    100  private:
    101   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
    102 };
    103 
    104 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
    105  public:
    106   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
    107       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
    108 
    109   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    110     __ Bind(GetEntryLabel());
    111     if (type_ == Primitive::kPrimInt) {
    112       if (is_div_) {
    113         __ negl(cpu_reg_);
    114       } else {
    115         __ xorl(cpu_reg_, cpu_reg_);
    116       }
    117 
    118     } else {
    119       DCHECK_EQ(Primitive::kPrimLong, type_);
    120       if (is_div_) {
    121         __ negq(cpu_reg_);
    122       } else {
    123         __ xorl(cpu_reg_, cpu_reg_);
    124       }
    125     }
    126     __ jmp(GetExitLabel());
    127   }
    128 
    129   const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
    130 
    131  private:
    132   const CpuRegister cpu_reg_;
    133   const Primitive::Type type_;
    134   const bool is_div_;
    135   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
    136 };
    137 
    138 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
    139  public:
    140   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
    141       : SlowPathCode(instruction), successor_(successor) {}
    142 
    143   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    144     LocationSummary* locations = instruction_->GetLocations();
    145     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    146     __ Bind(GetEntryLabel());
    147     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
    148     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
    149     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
    150     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
    151     if (successor_ == nullptr) {
    152       __ jmp(GetReturnLabel());
    153     } else {
    154       __ jmp(x86_64_codegen->GetLabelOf(successor_));
    155     }
    156   }
    157 
    158   Label* GetReturnLabel() {
    159     DCHECK(successor_ == nullptr);
    160     return &return_label_;
    161   }
    162 
    163   HBasicBlock* GetSuccessor() const {
    164     return successor_;
    165   }
    166 
    167   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
    168 
    169  private:
    170   HBasicBlock* const successor_;
    171   Label return_label_;
    172 
    173   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
    174 };
    175 
    176 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
    177  public:
    178   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
    179     : SlowPathCode(instruction) {}
    180 
    181   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    182     LocationSummary* locations = instruction_->GetLocations();
    183     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    184     __ Bind(GetEntryLabel());
    185     if (instruction_->CanThrowIntoCatchBlock()) {
    186       // Live registers will be restored in the catch block if caught.
    187       SaveLiveRegisters(codegen, instruction_->GetLocations());
    188     }
    189     // Are we using an array length from memory?
    190     HInstruction* array_length = instruction_->InputAt(1);
    191     Location length_loc = locations->InAt(1);
    192     InvokeRuntimeCallingConvention calling_convention;
    193     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
    194       // Load the array length into our temporary.
    195       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
    196       Location array_loc = array_length->GetLocations()->InAt(0);
    197       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
    198       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
    199       // Check for conflicts with index.
    200       if (length_loc.Equals(locations->InAt(0))) {
    201         // We know we aren't using parameter 2.
    202         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
    203       }
    204       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
    205       if (mirror::kUseStringCompression) {
    206         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
    207       }
    208     }
    209 
    210     // We're moving two locations to locations that could overlap, so we need a parallel
    211     // move resolver.
    212     codegen->EmitParallelMoves(
    213         locations->InAt(0),
    214         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    215         Primitive::kPrimInt,
    216         length_loc,
    217         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    218         Primitive::kPrimInt);
    219     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
    220         ? kQuickThrowStringBounds
    221         : kQuickThrowArrayBounds;
    222     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
    223     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
    224     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
    225   }
    226 
    227   bool IsFatal() const OVERRIDE { return true; }
    228 
    229   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
    230 
    231  private:
    232   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
    233 };
    234 
    235 class LoadClassSlowPathX86_64 : public SlowPathCode {
    236  public:
    237   LoadClassSlowPathX86_64(HLoadClass* cls,
    238                           HInstruction* at,
    239                           uint32_t dex_pc,
    240                           bool do_clinit)
    241       : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
    242     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
    243   }
    244 
    245   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    246     LocationSummary* locations = instruction_->GetLocations();
    247     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    248     __ Bind(GetEntryLabel());
    249 
    250     SaveLiveRegisters(codegen, locations);
    251 
    252     // Custom calling convention: RAX serves as both input and output.
    253     __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
    254     x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
    255                                   instruction_,
    256                                   dex_pc_,
    257                                   this);
    258     if (do_clinit_) {
    259       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
    260     } else {
    261       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
    262     }
    263 
    264     Location out = locations->Out();
    265     // Move the class to the desired location.
    266     if (out.IsValid()) {
    267       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    268       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
    269     }
    270 
    271     RestoreLiveRegisters(codegen, locations);
    272     // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
    273     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
    274     if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
    275       DCHECK(out.IsValid());
    276       __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
    277               locations->Out().AsRegister<CpuRegister>());
    278       Label* fixup_label = x86_64_codegen->NewTypeBssEntryPatch(cls_);
    279       __ Bind(fixup_label);
    280     }
    281     __ jmp(GetExitLabel());
    282   }
    283 
    284   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
    285 
    286  private:
    287   // The class this slow path will load.
    288   HLoadClass* const cls_;
    289 
    290   // The dex PC of `at_`.
    291   const uint32_t dex_pc_;
    292 
    293   // Whether to initialize the class.
    294   const bool do_clinit_;
    295 
    296   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
    297 };
    298 
    299 class LoadStringSlowPathX86_64 : public SlowPathCode {
    300  public:
    301   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
    302 
    303   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    304     LocationSummary* locations = instruction_->GetLocations();
    305     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    306 
    307     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    308     __ Bind(GetEntryLabel());
    309     SaveLiveRegisters(codegen, locations);
    310 
    311     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
    312     // Custom calling convention: RAX serves as both input and output.
    313     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
    314     x86_64_codegen->InvokeRuntime(kQuickResolveString,
    315                                   instruction_,
    316                                   instruction_->GetDexPc(),
    317                                   this);
    318     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
    319     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
    320     RestoreLiveRegisters(codegen, locations);
    321 
    322     // Store the resolved String to the BSS entry.
    323     __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
    324             locations->Out().AsRegister<CpuRegister>());
    325     Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString());
    326     __ Bind(fixup_label);
    327 
    328     __ jmp(GetExitLabel());
    329   }
    330 
    331   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
    332 
    333  private:
    334   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
    335 };
    336 
    337 class TypeCheckSlowPathX86_64 : public SlowPathCode {
    338  public:
    339   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
    340       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
    341 
    342   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    343     LocationSummary* locations = instruction_->GetLocations();
    344     uint32_t dex_pc = instruction_->GetDexPc();
    345     DCHECK(instruction_->IsCheckCast()
    346            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    347 
    348     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    349     __ Bind(GetEntryLabel());
    350 
    351     if (!is_fatal_) {
    352       SaveLiveRegisters(codegen, locations);
    353     }
    354 
    355     // We're moving two locations to locations that could overlap, so we need a parallel
    356     // move resolver.
    357     InvokeRuntimeCallingConvention calling_convention;
    358     codegen->EmitParallelMoves(locations->InAt(0),
    359                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    360                                Primitive::kPrimNot,
    361                                locations->InAt(1),
    362                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    363                                Primitive::kPrimNot);
    364     if (instruction_->IsInstanceOf()) {
    365       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
    366       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
    367     } else {
    368       DCHECK(instruction_->IsCheckCast());
    369       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
    370       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
    371     }
    372 
    373     if (!is_fatal_) {
    374       if (instruction_->IsInstanceOf()) {
    375         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
    376       }
    377 
    378       RestoreLiveRegisters(codegen, locations);
    379       __ jmp(GetExitLabel());
    380     }
    381   }
    382 
    383   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
    384 
    385   bool IsFatal() const OVERRIDE { return is_fatal_; }
    386 
    387  private:
    388   const bool is_fatal_;
    389 
    390   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
    391 };
    392 
    393 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
    394  public:
    395   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
    396       : SlowPathCode(instruction) {}
    397 
    398   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    399     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    400     __ Bind(GetEntryLabel());
    401     LocationSummary* locations = instruction_->GetLocations();
    402     SaveLiveRegisters(codegen, locations);
    403     InvokeRuntimeCallingConvention calling_convention;
    404     x86_64_codegen->Load32BitValue(
    405         CpuRegister(calling_convention.GetRegisterAt(0)),
    406         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
    407     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
    408     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
    409   }
    410 
    411   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
    412 
    413  private:
    414   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
    415 };
    416 
    417 class ArraySetSlowPathX86_64 : public SlowPathCode {
    418  public:
    419   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
    420 
    421   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    422     LocationSummary* locations = instruction_->GetLocations();
    423     __ Bind(GetEntryLabel());
    424     SaveLiveRegisters(codegen, locations);
    425 
    426     InvokeRuntimeCallingConvention calling_convention;
    427     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
    428     parallel_move.AddMove(
    429         locations->InAt(0),
    430         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    431         Primitive::kPrimNot,
    432         nullptr);
    433     parallel_move.AddMove(
    434         locations->InAt(1),
    435         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    436         Primitive::kPrimInt,
    437         nullptr);
    438     parallel_move.AddMove(
    439         locations->InAt(2),
    440         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
    441         Primitive::kPrimNot,
    442         nullptr);
    443     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    444 
    445     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    446     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
    447     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
    448     RestoreLiveRegisters(codegen, locations);
    449     __ jmp(GetExitLabel());
    450   }
    451 
    452   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
    453 
    454  private:
    455   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
    456 };
    457 
    458 // Slow path marking an object reference `ref` during a read
    459 // barrier. The field `obj.field` in the object `obj` holding this
    460 // reference does not get updated by this slow path after marking (see
    461 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
    462 //
    463 // This means that after the execution of this slow path, `ref` will
    464 // always be up-to-date, but `obj.field` may not; i.e., after the
    465 // flip, `ref` will be a to-space reference, but `obj.field` will
    466 // probably still be a from-space reference (unless it gets updated by
    467 // another thread, or if another thread installed another object
    468 // reference (different from `ref`) in `obj.field`).
    469 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
    470  public:
    471   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
    472                                 Location ref,
    473                                 bool unpoison_ref_before_marking)
    474       : SlowPathCode(instruction),
    475         ref_(ref),
    476         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
    477     DCHECK(kEmitCompilerReadBarrier);
    478   }
    479 
    480   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
    481 
    482   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    483     LocationSummary* locations = instruction_->GetLocations();
    484     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
    485     Register ref_reg = ref_cpu_reg.AsRegister();
    486     DCHECK(locations->CanCall());
    487     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
    488     DCHECK(instruction_->IsInstanceFieldGet() ||
    489            instruction_->IsStaticFieldGet() ||
    490            instruction_->IsArrayGet() ||
    491            instruction_->IsArraySet() ||
    492            instruction_->IsLoadClass() ||
    493            instruction_->IsLoadString() ||
    494            instruction_->IsInstanceOf() ||
    495            instruction_->IsCheckCast() ||
    496            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
    497            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
    498         << "Unexpected instruction in read barrier marking slow path: "
    499         << instruction_->DebugName();
    500 
    501     __ Bind(GetEntryLabel());
    502     if (unpoison_ref_before_marking_) {
    503       // Object* ref = ref_addr->AsMirrorPtr()
    504       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
    505     }
    506     // No need to save live registers; it's taken care of by the
    507     // entrypoint. Also, there is no need to update the stack mask,
    508     // as this runtime call will not trigger a garbage collection.
    509     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    510     DCHECK_NE(ref_reg, RSP);
    511     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
    512     // "Compact" slow path, saving two moves.
    513     //
    514     // Instead of using the standard runtime calling convention (input
    515     // and output in R0):
    516     //
    517     //   RDI <- ref
    518     //   RAX <- ReadBarrierMark(RDI)
    519     //   ref <- RAX
    520     //
    521     // we just use rX (the register containing `ref`) as input and output
    522     // of a dedicated entrypoint:
    523     //
    524     //   rX <- ReadBarrierMarkRegX(rX)
    525     //
    526     int32_t entry_point_offset =
    527         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
    528     // This runtime call does not require a stack map.
    529     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    530     __ jmp(GetExitLabel());
    531   }
    532 
    533  private:
    534   // The location (register) of the marked object reference.
    535   const Location ref_;
    536   // Should the reference in `ref_` be unpoisoned prior to marking it?
    537   const bool unpoison_ref_before_marking_;
    538 
    539   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
    540 };
    541 
    542 // Slow path marking an object reference `ref` during a read barrier,
    543 // and if needed, atomically updating the field `obj.field` in the
    544 // object `obj` holding this reference after marking (contrary to
    545 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
    546 // `obj.field`).
    547 //
    548 // This means that after the execution of this slow path, both `ref`
    549 // and `obj.field` will be up-to-date; i.e., after the flip, both will
    550 // hold the same to-space reference (unless another thread installed
    551 // another object reference (different from `ref`) in `obj.field`).
    552 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
    553  public:
    554   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
    555                                               Location ref,
    556                                               CpuRegister obj,
    557                                               const Address& field_addr,
    558                                               bool unpoison_ref_before_marking,
    559                                               CpuRegister temp1,
    560                                               CpuRegister temp2)
    561       : SlowPathCode(instruction),
    562         ref_(ref),
    563         obj_(obj),
    564         field_addr_(field_addr),
    565         unpoison_ref_before_marking_(unpoison_ref_before_marking),
    566         temp1_(temp1),
    567         temp2_(temp2) {
    568     DCHECK(kEmitCompilerReadBarrier);
    569   }
    570 
    571   const char* GetDescription() const OVERRIDE {
    572     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
    573   }
    574 
    575   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    576     LocationSummary* locations = instruction_->GetLocations();
    577     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
    578     Register ref_reg = ref_cpu_reg.AsRegister();
    579     DCHECK(locations->CanCall());
    580     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
    581     // This slow path is only used by the UnsafeCASObject intrinsic.
    582     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
    583         << "Unexpected instruction in read barrier marking and field updating slow path: "
    584         << instruction_->DebugName();
    585     DCHECK(instruction_->GetLocations()->Intrinsified());
    586     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
    587 
    588     __ Bind(GetEntryLabel());
    589     if (unpoison_ref_before_marking_) {
    590       // Object* ref = ref_addr->AsMirrorPtr()
    591       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
    592     }
    593 
    594     // Save the old (unpoisoned) reference.
    595     __ movl(temp1_, ref_cpu_reg);
    596 
    597     // No need to save live registers; it's taken care of by the
    598     // entrypoint. Also, there is no need to update the stack mask,
    599     // as this runtime call will not trigger a garbage collection.
    600     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    601     DCHECK_NE(ref_reg, RSP);
    602     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
    603     // "Compact" slow path, saving two moves.
    604     //
    605     // Instead of using the standard runtime calling convention (input
    606     // and output in R0):
    607     //
    608     //   RDI <- ref
    609     //   RAX <- ReadBarrierMark(RDI)
    610     //   ref <- RAX
    611     //
    612     // we just use rX (the register containing `ref`) as input and output
    613     // of a dedicated entrypoint:
    614     //
    615     //   rX <- ReadBarrierMarkRegX(rX)
    616     //
    617     int32_t entry_point_offset =
    618         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
    619     // This runtime call does not require a stack map.
    620     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    621 
    622     // If the new reference is different from the old reference,
    623     // update the field in the holder (`*field_addr`).
    624     //
    625     // Note that this field could also hold a different object, if
    626     // another thread had concurrently changed it. In that case, the
    627     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
    628     // operation below would abort the CAS, leaving the field as-is.
    629     NearLabel done;
    630     __ cmpl(temp1_, ref_cpu_reg);
    631     __ j(kEqual, &done);
    632 
    633     // Update the the holder's field atomically.  This may fail if
    634     // mutator updates before us, but it's OK.  This is achived
    635     // using a strong compare-and-set (CAS) operation with relaxed
    636     // memory synchronization ordering, where the expected value is
    637     // the old reference and the desired value is the new reference.
    638     // This operation is implemented with a 32-bit LOCK CMPXLCHG
    639     // instruction, which requires the expected value (the old
    640     // reference) to be in EAX.  Save RAX beforehand, and move the
    641     // expected value (stored in `temp1_`) into EAX.
    642     __ movq(temp2_, CpuRegister(RAX));
    643     __ movl(CpuRegister(RAX), temp1_);
    644 
    645     // Convenience aliases.
    646     CpuRegister base = obj_;
    647     CpuRegister expected = CpuRegister(RAX);
    648     CpuRegister value = ref_cpu_reg;
    649 
    650     bool base_equals_value = (base.AsRegister() == value.AsRegister());
    651     Register value_reg = ref_reg;
    652     if (kPoisonHeapReferences) {
    653       if (base_equals_value) {
    654         // If `base` and `value` are the same register location, move
    655         // `value_reg` to a temporary register.  This way, poisoning
    656         // `value_reg` won't invalidate `base`.
    657         value_reg = temp1_.AsRegister();
    658         __ movl(CpuRegister(value_reg), base);
    659       }
    660 
    661       // Check that the register allocator did not assign the location
    662       // of `expected` (RAX) to `value` nor to `base`, so that heap
    663       // poisoning (when enabled) works as intended below.
    664       // - If `value` were equal to `expected`, both references would
    665       //   be poisoned twice, meaning they would not be poisoned at
    666       //   all, as heap poisoning uses address negation.
    667       // - If `base` were equal to `expected`, poisoning `expected`
    668       //   would invalidate `base`.
    669       DCHECK_NE(value_reg, expected.AsRegister());
    670       DCHECK_NE(base.AsRegister(), expected.AsRegister());
    671 
    672       __ PoisonHeapReference(expected);
    673       __ PoisonHeapReference(CpuRegister(value_reg));
    674     }
    675 
    676     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
    677 
    678     // If heap poisoning is enabled, we need to unpoison the values
    679     // that were poisoned earlier.
    680     if (kPoisonHeapReferences) {
    681       if (base_equals_value) {
    682         // `value_reg` has been moved to a temporary register, no need
    683         // to unpoison it.
    684       } else {
    685         __ UnpoisonHeapReference(CpuRegister(value_reg));
    686       }
    687       // No need to unpoison `expected` (RAX), as it is be overwritten below.
    688     }
    689 
    690     // Restore RAX.
    691     __ movq(CpuRegister(RAX), temp2_);
    692 
    693     __ Bind(&done);
    694     __ jmp(GetExitLabel());
    695   }
    696 
    697  private:
    698   // The location (register) of the marked object reference.
    699   const Location ref_;
    700   // The register containing the object holding the marked object reference field.
    701   const CpuRegister obj_;
    702   // The address of the marked reference field.  The base of this address must be `obj_`.
    703   const Address field_addr_;
    704 
    705   // Should the reference in `ref_` be unpoisoned prior to marking it?
    706   const bool unpoison_ref_before_marking_;
    707 
    708   const CpuRegister temp1_;
    709   const CpuRegister temp2_;
    710 
    711   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
    712 };
    713 
    714 // Slow path generating a read barrier for a heap reference.
    715 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
    716  public:
    717   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
    718                                             Location out,
    719                                             Location ref,
    720                                             Location obj,
    721                                             uint32_t offset,
    722                                             Location index)
    723       : SlowPathCode(instruction),
    724         out_(out),
    725         ref_(ref),
    726         obj_(obj),
    727         offset_(offset),
    728         index_(index) {
    729     DCHECK(kEmitCompilerReadBarrier);
    730     // If `obj` is equal to `out` or `ref`, it means the initial
    731     // object has been overwritten by (or after) the heap object
    732     // reference load to be instrumented, e.g.:
    733     //
    734     //   __ movl(out, Address(out, offset));
    735     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
    736     //
    737     // In that case, we have lost the information about the original
    738     // object, and the emitted read barrier cannot work properly.
    739     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
    740     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
    741 }
    742 
    743   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    744     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    745     LocationSummary* locations = instruction_->GetLocations();
    746     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
    747     DCHECK(locations->CanCall());
    748     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
    749     DCHECK(instruction_->IsInstanceFieldGet() ||
    750            instruction_->IsStaticFieldGet() ||
    751            instruction_->IsArrayGet() ||
    752            instruction_->IsInstanceOf() ||
    753            instruction_->IsCheckCast() ||
    754            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
    755         << "Unexpected instruction in read barrier for heap reference slow path: "
    756         << instruction_->DebugName();
    757 
    758     __ Bind(GetEntryLabel());
    759     SaveLiveRegisters(codegen, locations);
    760 
    761     // We may have to change the index's value, but as `index_` is a
    762     // constant member (like other "inputs" of this slow path),
    763     // introduce a copy of it, `index`.
    764     Location index = index_;
    765     if (index_.IsValid()) {
    766       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
    767       if (instruction_->IsArrayGet()) {
    768         // Compute real offset and store it in index_.
    769         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
    770         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
    771         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
    772           // We are about to change the value of `index_reg` (see the
    773           // calls to art::x86_64::X86_64Assembler::shll and
    774           // art::x86_64::X86_64Assembler::AddImmediate below), but it
    775           // has not been saved by the previous call to
    776           // art::SlowPathCode::SaveLiveRegisters, as it is a
    777           // callee-save register --
    778           // art::SlowPathCode::SaveLiveRegisters does not consider
    779           // callee-save registers, as it has been designed with the
    780           // assumption that callee-save registers are supposed to be
    781           // handled by the called function.  So, as a callee-save
    782           // register, `index_reg` _would_ eventually be saved onto
    783           // the stack, but it would be too late: we would have
    784           // changed its value earlier.  Therefore, we manually save
    785           // it here into another freely available register,
    786           // `free_reg`, chosen of course among the caller-save
    787           // registers (as a callee-save `free_reg` register would
    788           // exhibit the same problem).
    789           //
    790           // Note we could have requested a temporary register from
    791           // the register allocator instead; but we prefer not to, as
    792           // this is a slow path, and we know we can find a
    793           // caller-save register that is available.
    794           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
    795           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
    796           index_reg = free_reg;
    797           index = Location::RegisterLocation(index_reg);
    798         } else {
    799           // The initial register stored in `index_` has already been
    800           // saved in the call to art::SlowPathCode::SaveLiveRegisters
    801           // (as it is not a callee-save register), so we can freely
    802           // use it.
    803         }
    804         // Shifting the index value contained in `index_reg` by the
    805         // scale factor (2) cannot overflow in practice, as the
    806         // runtime is unable to allocate object arrays with a size
    807         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
    808         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
    809         static_assert(
    810             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
    811             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
    812         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
    813       } else {
    814         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
    815         // intrinsics, `index_` is not shifted by a scale factor of 2
    816         // (as in the case of ArrayGet), as it is actually an offset
    817         // to an object field within an object.
    818         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
    819         DCHECK(instruction_->GetLocations()->Intrinsified());
    820         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
    821                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
    822             << instruction_->AsInvoke()->GetIntrinsic();
    823         DCHECK_EQ(offset_, 0U);
    824         DCHECK(index_.IsRegister());
    825       }
    826     }
    827 
    828     // We're moving two or three locations to locations that could
    829     // overlap, so we need a parallel move resolver.
    830     InvokeRuntimeCallingConvention calling_convention;
    831     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
    832     parallel_move.AddMove(ref_,
    833                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    834                           Primitive::kPrimNot,
    835                           nullptr);
    836     parallel_move.AddMove(obj_,
    837                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    838                           Primitive::kPrimNot,
    839                           nullptr);
    840     if (index.IsValid()) {
    841       parallel_move.AddMove(index,
    842                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
    843                             Primitive::kPrimInt,
    844                             nullptr);
    845       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    846     } else {
    847       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    848       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
    849     }
    850     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
    851                                   instruction_,
    852                                   instruction_->GetDexPc(),
    853                                   this);
    854     CheckEntrypointTypes<
    855         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
    856     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
    857 
    858     RestoreLiveRegisters(codegen, locations);
    859     __ jmp(GetExitLabel());
    860   }
    861 
    862   const char* GetDescription() const OVERRIDE {
    863     return "ReadBarrierForHeapReferenceSlowPathX86_64";
    864   }
    865 
    866  private:
    867   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
    868     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
    869     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
    870     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
    871       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
    872         return static_cast<CpuRegister>(i);
    873       }
    874     }
    875     // We shall never fail to find a free caller-save register, as
    876     // there are more than two core caller-save registers on x86-64
    877     // (meaning it is possible to find one which is different from
    878     // `ref` and `obj`).
    879     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
    880     LOG(FATAL) << "Could not find a free caller-save register";
    881     UNREACHABLE();
    882   }
    883 
    884   const Location out_;
    885   const Location ref_;
    886   const Location obj_;
    887   const uint32_t offset_;
    888   // An additional location containing an index to an array.
    889   // Only used for HArrayGet and the UnsafeGetObject &
    890   // UnsafeGetObjectVolatile intrinsics.
    891   const Location index_;
    892 
    893   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
    894 };
    895 
    896 // Slow path generating a read barrier for a GC root.
    897 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
    898  public:
    899   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
    900       : SlowPathCode(instruction), out_(out), root_(root) {
    901     DCHECK(kEmitCompilerReadBarrier);
    902   }
    903 
    904   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    905     LocationSummary* locations = instruction_->GetLocations();
    906     DCHECK(locations->CanCall());
    907     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
    908     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
    909         << "Unexpected instruction in read barrier for GC root slow path: "
    910         << instruction_->DebugName();
    911 
    912     __ Bind(GetEntryLabel());
    913     SaveLiveRegisters(codegen, locations);
    914 
    915     InvokeRuntimeCallingConvention calling_convention;
    916     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    917     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
    918     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
    919                                   instruction_,
    920                                   instruction_->GetDexPc(),
    921                                   this);
    922     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
    923     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
    924 
    925     RestoreLiveRegisters(codegen, locations);
    926     __ jmp(GetExitLabel());
    927   }
    928 
    929   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
    930 
    931  private:
    932   const Location out_;
    933   const Location root_;
    934 
    935   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
    936 };
    937 
    938 #undef __
    939 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
    940 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
    941 
    942 inline Condition X86_64IntegerCondition(IfCondition cond) {
    943   switch (cond) {
    944     case kCondEQ: return kEqual;
    945     case kCondNE: return kNotEqual;
    946     case kCondLT: return kLess;
    947     case kCondLE: return kLessEqual;
    948     case kCondGT: return kGreater;
    949     case kCondGE: return kGreaterEqual;
    950     case kCondB:  return kBelow;
    951     case kCondBE: return kBelowEqual;
    952     case kCondA:  return kAbove;
    953     case kCondAE: return kAboveEqual;
    954   }
    955   LOG(FATAL) << "Unreachable";
    956   UNREACHABLE();
    957 }
    958 
    959 // Maps FP condition to x86_64 name.
    960 inline Condition X86_64FPCondition(IfCondition cond) {
    961   switch (cond) {
    962     case kCondEQ: return kEqual;
    963     case kCondNE: return kNotEqual;
    964     case kCondLT: return kBelow;
    965     case kCondLE: return kBelowEqual;
    966     case kCondGT: return kAbove;
    967     case kCondGE: return kAboveEqual;
    968     default:      break;  // should not happen
    969   };
    970   LOG(FATAL) << "Unreachable";
    971   UNREACHABLE();
    972 }
    973 
    974 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
    975       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    976       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
    977   return desired_dispatch_info;
    978 }
    979 
    980 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
    981     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
    982   // All registers are assumed to be correctly set up.
    983 
    984   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
    985   switch (invoke->GetMethodLoadKind()) {
    986     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
    987       // temp = thread->string_init_entrypoint
    988       uint32_t offset =
    989           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
    990       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
    991       break;
    992     }
    993     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
    994       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
    995       break;
    996     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
    997       DCHECK(GetCompilerOptions().IsBootImage());
    998       __ leal(temp.AsRegister<CpuRegister>(),
    999               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
   1000       RecordBootMethodPatch(invoke);
   1001       break;
   1002     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
   1003       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
   1004       break;
   1005     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
   1006       __ movq(temp.AsRegister<CpuRegister>(),
   1007               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
   1008       // Bind a new fixup label at the end of the "movl" insn.
   1009       __ Bind(NewMethodBssEntryPatch(
   1010           MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())));
   1011       break;
   1012     }
   1013     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
   1014       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
   1015       return;  // No code pointer retrieval; the runtime performs the call directly.
   1016     }
   1017   }
   1018 
   1019   switch (invoke->GetCodePtrLocation()) {
   1020     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
   1021       __ call(&frame_entry_label_);
   1022       break;
   1023     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
   1024       // (callee_method + offset_of_quick_compiled_code)()
   1025       __ call(Address(callee_method.AsRegister<CpuRegister>(),
   1026                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
   1027                           kX86_64PointerSize).SizeValue()));
   1028       break;
   1029   }
   1030   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   1031 
   1032   DCHECK(!IsLeafMethod());
   1033 }
   1034 
   1035 void CodeGeneratorX86_64::GenerateVirtualCall(
   1036     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
   1037   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   1038   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   1039       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
   1040 
   1041   // Use the calling convention instead of the location of the receiver, as
   1042   // intrinsics may have put the receiver in a different register. In the intrinsics
   1043   // slow path, the arguments have been moved to the right place, so here we are
   1044   // guaranteed that the receiver is the first register of the calling convention.
   1045   InvokeDexCallingConvention calling_convention;
   1046   Register receiver = calling_convention.GetRegisterAt(0);
   1047 
   1048   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
   1049   // /* HeapReference<Class> */ temp = receiver->klass_
   1050   __ movl(temp, Address(CpuRegister(receiver), class_offset));
   1051   MaybeRecordImplicitNullCheck(invoke);
   1052   // Instead of simply (possibly) unpoisoning `temp` here, we should
   1053   // emit a read barrier for the previous class reference load.
   1054   // However this is not required in practice, as this is an
   1055   // intermediate/temporary reference and because the current
   1056   // concurrent copying collector keeps the from-space memory
   1057   // intact/accessible until the end of the marking phase (the
   1058   // concurrent copying collector may not in the future).
   1059   __ MaybeUnpoisonHeapReference(temp);
   1060   // temp = temp->GetMethodAt(method_offset);
   1061   __ movq(temp, Address(temp, method_offset));
   1062   // call temp->GetEntryPoint();
   1063   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
   1064       kX86_64PointerSize).SizeValue()));
   1065   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   1066 }
   1067 
   1068 void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
   1069   boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
   1070                                           invoke->GetTargetMethod().dex_method_index);
   1071   __ Bind(&boot_image_method_patches_.back().label);
   1072 }
   1073 
   1074 Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) {
   1075   // Add a patch entry and return the label.
   1076   method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.dex_method_index);
   1077   return &method_bss_entry_patches_.back().label;
   1078 }
   1079 
   1080 void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) {
   1081   boot_image_type_patches_.emplace_back(load_class->GetDexFile(),
   1082                                         load_class->GetTypeIndex().index_);
   1083   __ Bind(&boot_image_type_patches_.back().label);
   1084 }
   1085 
   1086 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
   1087   type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
   1088   return &type_bss_entry_patches_.back().label;
   1089 }
   1090 
   1091 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
   1092   DCHECK(GetCompilerOptions().IsBootImage());
   1093   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   1094   __ Bind(&string_patches_.back().label);
   1095 }
   1096 
   1097 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
   1098   DCHECK(!GetCompilerOptions().IsBootImage());
   1099   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   1100   return &string_patches_.back().label;
   1101 }
   1102 
   1103 // The label points to the end of the "movl" or another instruction but the literal offset
   1104 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
   1105 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
   1106 
   1107 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
   1108 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
   1109     const ArenaDeque<PatchInfo<Label>>& infos,
   1110     ArenaVector<LinkerPatch>* linker_patches) {
   1111   for (const PatchInfo<Label>& info : infos) {
   1112     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
   1113     linker_patches->push_back(
   1114         Factory(literal_offset, &info.dex_file, info.label.Position(), info.index));
   1115   }
   1116 }
   1117 
   1118 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   1119   DCHECK(linker_patches->empty());
   1120   size_t size =
   1121       boot_image_method_patches_.size() +
   1122       method_bss_entry_patches_.size() +
   1123       boot_image_type_patches_.size() +
   1124       type_bss_entry_patches_.size() +
   1125       string_patches_.size();
   1126   linker_patches->reserve(size);
   1127   if (GetCompilerOptions().IsBootImage()) {
   1128     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
   1129                                                                   linker_patches);
   1130     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
   1131                                                                 linker_patches);
   1132     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   1133   } else {
   1134     DCHECK(boot_image_method_patches_.empty());
   1135     DCHECK(boot_image_type_patches_.empty());
   1136     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   1137   }
   1138   EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
   1139                                                                 linker_patches);
   1140   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
   1141                                                               linker_patches);
   1142   DCHECK_EQ(size, linker_patches->size());
   1143 }
   1144 
   1145 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   1146   stream << Register(reg);
   1147 }
   1148 
   1149 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
   1150   stream << FloatRegister(reg);
   1151 }
   1152 
   1153 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   1154   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
   1155   return kX86_64WordSize;
   1156 }
   1157 
   1158 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   1159   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
   1160   return kX86_64WordSize;
   1161 }
   1162 
   1163 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1164   if (GetGraph()->HasSIMD()) {
   1165     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
   1166   } else {
   1167     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
   1168   }
   1169   return GetFloatingPointSpillSlotSize();
   1170 }
   1171 
   1172 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1173   if (GetGraph()->HasSIMD()) {
   1174     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
   1175   } else {
   1176     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
   1177   }
   1178   return GetFloatingPointSpillSlotSize();
   1179 }
   1180 
   1181 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
   1182                                         HInstruction* instruction,
   1183                                         uint32_t dex_pc,
   1184                                         SlowPathCode* slow_path) {
   1185   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
   1186   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
   1187   if (EntrypointRequiresStackMap(entrypoint)) {
   1188     RecordPcInfo(instruction, dex_pc, slow_path);
   1189   }
   1190 }
   1191 
   1192 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
   1193                                                               HInstruction* instruction,
   1194                                                               SlowPathCode* slow_path) {
   1195   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
   1196   GenerateInvokeRuntime(entry_point_offset);
   1197 }
   1198 
   1199 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
   1200   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
   1201 }
   1202 
   1203 static constexpr int kNumberOfCpuRegisterPairs = 0;
   1204 // Use a fake return address register to mimic Quick.
   1205 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
   1206 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
   1207                                          const X86_64InstructionSetFeatures& isa_features,
   1208                                          const CompilerOptions& compiler_options,
   1209                                          OptimizingCompilerStats* stats)
   1210       : CodeGenerator(graph,
   1211                       kNumberOfCpuRegisters,
   1212                       kNumberOfFloatRegisters,
   1213                       kNumberOfCpuRegisterPairs,
   1214                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
   1215                                           arraysize(kCoreCalleeSaves))
   1216                           | (1 << kFakeReturnRegister),
   1217                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
   1218                                           arraysize(kFpuCalleeSaves)),
   1219                       compiler_options,
   1220                       stats),
   1221         block_labels_(nullptr),
   1222         location_builder_(graph, this),
   1223         instruction_visitor_(graph, this),
   1224         move_resolver_(graph->GetArena(), this),
   1225         assembler_(graph->GetArena()),
   1226         isa_features_(isa_features),
   1227         constant_area_start_(0),
   1228         boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1229         method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1230         boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1231         type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1232         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1233         jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1234         jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1235         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   1236   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
   1237 }
   1238 
   1239 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
   1240                                                                CodeGeneratorX86_64* codegen)
   1241       : InstructionCodeGenerator(graph, codegen),
   1242         assembler_(codegen->GetAssembler()),
   1243         codegen_(codegen) {}
   1244 
   1245 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   1246   // Stack register is always reserved.
   1247   blocked_core_registers_[RSP] = true;
   1248 
   1249   // Block the register used as TMP.
   1250   blocked_core_registers_[TMP] = true;
   1251 }
   1252 
   1253 static dwarf::Reg DWARFReg(Register reg) {
   1254   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
   1255 }
   1256 
   1257 static dwarf::Reg DWARFReg(FloatRegister reg) {
   1258   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
   1259 }
   1260 
   1261 void CodeGeneratorX86_64::GenerateFrameEntry() {
   1262   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
   1263   __ Bind(&frame_entry_label_);
   1264   bool skip_overflow_check = IsLeafMethod()
   1265       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
   1266   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   1267 
   1268   if (!skip_overflow_check) {
   1269     __ testq(CpuRegister(RAX), Address(
   1270         CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
   1271     RecordPcInfo(nullptr, 0);
   1272   }
   1273 
   1274   if (HasEmptyFrame()) {
   1275     return;
   1276   }
   1277 
   1278   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
   1279     Register reg = kCoreCalleeSaves[i];
   1280     if (allocated_registers_.ContainsCoreRegister(reg)) {
   1281       __ pushq(CpuRegister(reg));
   1282       __ cfi().AdjustCFAOffset(kX86_64WordSize);
   1283       __ cfi().RelOffset(DWARFReg(reg), 0);
   1284     }
   1285   }
   1286 
   1287   int adjust = GetFrameSize() - GetCoreSpillSize();
   1288   __ subq(CpuRegister(RSP), Immediate(adjust));
   1289   __ cfi().AdjustCFAOffset(adjust);
   1290   uint32_t xmm_spill_location = GetFpuSpillStart();
   1291   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   1292 
   1293   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
   1294     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
   1295       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
   1296       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
   1297       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
   1298     }
   1299   }
   1300 
   1301   // Save the current method if we need it. Note that we do not
   1302   // do this in HCurrentMethod, as the instruction might have been removed
   1303   // in the SSA graph.
   1304   if (RequiresCurrentMethod()) {
   1305     __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
   1306             CpuRegister(kMethodRegisterArgument));
   1307   }
   1308 
   1309   if (GetGraph()->HasShouldDeoptimizeFlag()) {
   1310     // Initialize should_deoptimize flag to 0.
   1311     __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
   1312   }
   1313 }
   1314 
   1315 void CodeGeneratorX86_64::GenerateFrameExit() {
   1316   __ cfi().RememberState();
   1317   if (!HasEmptyFrame()) {
   1318     uint32_t xmm_spill_location = GetFpuSpillStart();
   1319     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   1320     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
   1321       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
   1322         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
   1323         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
   1324         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
   1325       }
   1326     }
   1327 
   1328     int adjust = GetFrameSize() - GetCoreSpillSize();
   1329     __ addq(CpuRegister(RSP), Immediate(adjust));
   1330     __ cfi().AdjustCFAOffset(-adjust);
   1331 
   1332     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
   1333       Register reg = kCoreCalleeSaves[i];
   1334       if (allocated_registers_.ContainsCoreRegister(reg)) {
   1335         __ popq(CpuRegister(reg));
   1336         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
   1337         __ cfi().Restore(DWARFReg(reg));
   1338       }
   1339     }
   1340   }
   1341   __ ret();
   1342   __ cfi().RestoreState();
   1343   __ cfi().DefCFAOffset(GetFrameSize());
   1344 }
   1345 
   1346 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
   1347   __ Bind(GetLabelOf(block));
   1348 }
   1349 
   1350 void CodeGeneratorX86_64::Move(Location destination, Location source) {
   1351   if (source.Equals(destination)) {
   1352     return;
   1353   }
   1354   if (destination.IsRegister()) {
   1355     CpuRegister dest = destination.AsRegister<CpuRegister>();
   1356     if (source.IsRegister()) {
   1357       __ movq(dest, source.AsRegister<CpuRegister>());
   1358     } else if (source.IsFpuRegister()) {
   1359       __ movd(dest, source.AsFpuRegister<XmmRegister>());
   1360     } else if (source.IsStackSlot()) {
   1361       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1362     } else if (source.IsConstant()) {
   1363       HConstant* constant = source.GetConstant();
   1364       if (constant->IsLongConstant()) {
   1365         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
   1366       } else {
   1367         Load32BitValue(dest, GetInt32ValueOf(constant));
   1368       }
   1369     } else {
   1370       DCHECK(source.IsDoubleStackSlot());
   1371       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1372     }
   1373   } else if (destination.IsFpuRegister()) {
   1374     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   1375     if (source.IsRegister()) {
   1376       __ movd(dest, source.AsRegister<CpuRegister>());
   1377     } else if (source.IsFpuRegister()) {
   1378       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
   1379     } else if (source.IsConstant()) {
   1380       HConstant* constant = source.GetConstant();
   1381       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
   1382       if (constant->IsFloatConstant()) {
   1383         Load32BitValue(dest, static_cast<int32_t>(value));
   1384       } else {
   1385         Load64BitValue(dest, value);
   1386       }
   1387     } else if (source.IsStackSlot()) {
   1388       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1389     } else {
   1390       DCHECK(source.IsDoubleStackSlot());
   1391       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1392     }
   1393   } else if (destination.IsStackSlot()) {
   1394     if (source.IsRegister()) {
   1395       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1396               source.AsRegister<CpuRegister>());
   1397     } else if (source.IsFpuRegister()) {
   1398       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1399                source.AsFpuRegister<XmmRegister>());
   1400     } else if (source.IsConstant()) {
   1401       HConstant* constant = source.GetConstant();
   1402       int32_t value = GetInt32ValueOf(constant);
   1403       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
   1404     } else {
   1405       DCHECK(source.IsStackSlot()) << source;
   1406       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   1407       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   1408     }
   1409   } else {
   1410     DCHECK(destination.IsDoubleStackSlot());
   1411     if (source.IsRegister()) {
   1412       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1413               source.AsRegister<CpuRegister>());
   1414     } else if (source.IsFpuRegister()) {
   1415       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1416                source.AsFpuRegister<XmmRegister>());
   1417     } else if (source.IsConstant()) {
   1418       HConstant* constant = source.GetConstant();
   1419       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
   1420       int64_t value = GetInt64ValueOf(constant);
   1421       Store64BitValueToStack(destination, value);
   1422     } else {
   1423       DCHECK(source.IsDoubleStackSlot());
   1424       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   1425       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   1426     }
   1427   }
   1428 }
   1429 
   1430 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
   1431   DCHECK(location.IsRegister());
   1432   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
   1433 }
   1434 
   1435 void CodeGeneratorX86_64::MoveLocation(
   1436     Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
   1437   Move(dst, src);
   1438 }
   1439 
   1440 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
   1441   if (location.IsRegister()) {
   1442     locations->AddTemp(location);
   1443   } else {
   1444     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
   1445   }
   1446 }
   1447 
   1448 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   1449   DCHECK(!successor->IsExitBlock());
   1450 
   1451   HBasicBlock* block = got->GetBlock();
   1452   HInstruction* previous = got->GetPrevious();
   1453 
   1454   HLoopInformation* info = block->GetLoopInformation();
   1455   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
   1456     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
   1457     return;
   1458   }
   1459 
   1460   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
   1461     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   1462   }
   1463   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
   1464     __ jmp(codegen_->GetLabelOf(successor));
   1465   }
   1466 }
   1467 
   1468 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
   1469   got->SetLocations(nullptr);
   1470 }
   1471 
   1472 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
   1473   HandleGoto(got, got->GetSuccessor());
   1474 }
   1475 
   1476 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
   1477   try_boundary->SetLocations(nullptr);
   1478 }
   1479 
   1480 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
   1481   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
   1482   if (!successor->IsExitBlock()) {
   1483     HandleGoto(try_boundary, successor);
   1484   }
   1485 }
   1486 
   1487 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
   1488   exit->SetLocations(nullptr);
   1489 }
   1490 
   1491 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
   1492 }
   1493 
   1494 template<class LabelType>
   1495 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
   1496                                                      LabelType* true_label,
   1497                                                      LabelType* false_label) {
   1498   if (cond->IsFPConditionTrueIfNaN()) {
   1499     __ j(kUnordered, true_label);
   1500   } else if (cond->IsFPConditionFalseIfNaN()) {
   1501     __ j(kUnordered, false_label);
   1502   }
   1503   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
   1504 }
   1505 
   1506 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
   1507   LocationSummary* locations = condition->GetLocations();
   1508 
   1509   Location left = locations->InAt(0);
   1510   Location right = locations->InAt(1);
   1511   Primitive::Type type = condition->InputAt(0)->GetType();
   1512   switch (type) {
   1513     case Primitive::kPrimBoolean:
   1514     case Primitive::kPrimByte:
   1515     case Primitive::kPrimChar:
   1516     case Primitive::kPrimShort:
   1517     case Primitive::kPrimInt:
   1518     case Primitive::kPrimNot: {
   1519       codegen_->GenerateIntCompare(left, right);
   1520       break;
   1521     }
   1522     case Primitive::kPrimLong: {
   1523       codegen_->GenerateLongCompare(left, right);
   1524       break;
   1525     }
   1526     case Primitive::kPrimFloat: {
   1527       if (right.IsFpuRegister()) {
   1528         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
   1529       } else if (right.IsConstant()) {
   1530         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
   1531                    codegen_->LiteralFloatAddress(
   1532                      right.GetConstant()->AsFloatConstant()->GetValue()));
   1533       } else {
   1534         DCHECK(right.IsStackSlot());
   1535         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
   1536                    Address(CpuRegister(RSP), right.GetStackIndex()));
   1537       }
   1538       break;
   1539     }
   1540     case Primitive::kPrimDouble: {
   1541       if (right.IsFpuRegister()) {
   1542         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
   1543       } else if (right.IsConstant()) {
   1544         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
   1545                    codegen_->LiteralDoubleAddress(
   1546                      right.GetConstant()->AsDoubleConstant()->GetValue()));
   1547       } else {
   1548         DCHECK(right.IsDoubleStackSlot());
   1549         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
   1550                    Address(CpuRegister(RSP), right.GetStackIndex()));
   1551       }
   1552       break;
   1553     }
   1554     default:
   1555       LOG(FATAL) << "Unexpected condition type " << type;
   1556   }
   1557 }
   1558 
   1559 template<class LabelType>
   1560 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
   1561                                                                   LabelType* true_target_in,
   1562                                                                   LabelType* false_target_in) {
   1563   // Generated branching requires both targets to be explicit. If either of the
   1564   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
   1565   LabelType fallthrough_target;
   1566   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
   1567   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
   1568 
   1569   // Generate the comparison to set the CC.
   1570   GenerateCompareTest(condition);
   1571 
   1572   // Now generate the correct jump(s).
   1573   Primitive::Type type = condition->InputAt(0)->GetType();
   1574   switch (type) {
   1575     case Primitive::kPrimLong: {
   1576       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
   1577       break;
   1578     }
   1579     case Primitive::kPrimFloat: {
   1580       GenerateFPJumps(condition, true_target, false_target);
   1581       break;
   1582     }
   1583     case Primitive::kPrimDouble: {
   1584       GenerateFPJumps(condition, true_target, false_target);
   1585       break;
   1586     }
   1587     default:
   1588       LOG(FATAL) << "Unexpected condition type " << type;
   1589   }
   1590 
   1591   if (false_target != &fallthrough_target) {
   1592     __ jmp(false_target);
   1593   }
   1594 
   1595   if (fallthrough_target.IsLinked()) {
   1596     __ Bind(&fallthrough_target);
   1597   }
   1598 }
   1599 
   1600 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
   1601   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
   1602   // are set only strictly before `branch`. We can't use the eflags on long
   1603   // conditions if they are materialized due to the complex branching.
   1604   return cond->IsCondition() &&
   1605          cond->GetNext() == branch &&
   1606          !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
   1607 }
   1608 
   1609 template<class LabelType>
   1610 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
   1611                                                            size_t condition_input_index,
   1612                                                            LabelType* true_target,
   1613                                                            LabelType* false_target) {
   1614   HInstruction* cond = instruction->InputAt(condition_input_index);
   1615 
   1616   if (true_target == nullptr && false_target == nullptr) {
   1617     // Nothing to do. The code always falls through.
   1618     return;
   1619   } else if (cond->IsIntConstant()) {
   1620     // Constant condition, statically compared against "true" (integer value 1).
   1621     if (cond->AsIntConstant()->IsTrue()) {
   1622       if (true_target != nullptr) {
   1623         __ jmp(true_target);
   1624       }
   1625     } else {
   1626       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
   1627       if (false_target != nullptr) {
   1628         __ jmp(false_target);
   1629       }
   1630     }
   1631     return;
   1632   }
   1633 
   1634   // The following code generates these patterns:
   1635   //  (1) true_target == nullptr && false_target != nullptr
   1636   //        - opposite condition true => branch to false_target
   1637   //  (2) true_target != nullptr && false_target == nullptr
   1638   //        - condition true => branch to true_target
   1639   //  (3) true_target != nullptr && false_target != nullptr
   1640   //        - condition true => branch to true_target
   1641   //        - branch to false_target
   1642   if (IsBooleanValueOrMaterializedCondition(cond)) {
   1643     if (AreEflagsSetFrom(cond, instruction)) {
   1644       if (true_target == nullptr) {
   1645         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
   1646       } else {
   1647         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
   1648       }
   1649     } else {
   1650       // Materialized condition, compare against 0.
   1651       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
   1652       if (lhs.IsRegister()) {
   1653         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
   1654       } else {
   1655         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
   1656       }
   1657       if (true_target == nullptr) {
   1658         __ j(kEqual, false_target);
   1659       } else {
   1660         __ j(kNotEqual, true_target);
   1661       }
   1662     }
   1663   } else {
   1664     // Condition has not been materialized, use its inputs as the
   1665     // comparison and its condition as the branch condition.
   1666     HCondition* condition = cond->AsCondition();
   1667 
   1668     // If this is a long or FP comparison that has been folded into
   1669     // the HCondition, generate the comparison directly.
   1670     Primitive::Type type = condition->InputAt(0)->GetType();
   1671     if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
   1672       GenerateCompareTestAndBranch(condition, true_target, false_target);
   1673       return;
   1674     }
   1675 
   1676     Location lhs = condition->GetLocations()->InAt(0);
   1677     Location rhs = condition->GetLocations()->InAt(1);
   1678     codegen_->GenerateIntCompare(lhs, rhs);
   1679       if (true_target == nullptr) {
   1680       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
   1681     } else {
   1682       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
   1683     }
   1684   }
   1685 
   1686   // If neither branch falls through (case 3), the conditional branch to `true_target`
   1687   // was already emitted (case 2) and we need to emit a jump to `false_target`.
   1688   if (true_target != nullptr && false_target != nullptr) {
   1689     __ jmp(false_target);
   1690   }
   1691 }
   1692 
   1693 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
   1694   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
   1695   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
   1696     locations->SetInAt(0, Location::Any());
   1697   }
   1698 }
   1699 
   1700 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
   1701   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   1702   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
   1703   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
   1704       nullptr : codegen_->GetLabelOf(true_successor);
   1705   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
   1706       nullptr : codegen_->GetLabelOf(false_successor);
   1707   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
   1708 }
   1709 
   1710 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   1711   LocationSummary* locations = new (GetGraph()->GetArena())
   1712       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   1713   InvokeRuntimeCallingConvention calling_convention;
   1714   RegisterSet caller_saves = RegisterSet::Empty();
   1715   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1716   locations->SetCustomSlowPathCallerSaves(caller_saves);
   1717   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
   1718     locations->SetInAt(0, Location::Any());
   1719   }
   1720 }
   1721 
   1722 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   1723   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
   1724   GenerateTestAndBranch<Label>(deoptimize,
   1725                                /* condition_input_index */ 0,
   1726                                slow_path->GetEntryLabel(),
   1727                                /* false_target */ nullptr);
   1728 }
   1729 
   1730 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   1731   LocationSummary* locations = new (GetGraph()->GetArena())
   1732       LocationSummary(flag, LocationSummary::kNoCall);
   1733   locations->SetOut(Location::RequiresRegister());
   1734 }
   1735 
   1736 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   1737   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
   1738           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
   1739 }
   1740 
   1741 static bool SelectCanUseCMOV(HSelect* select) {
   1742   // There are no conditional move instructions for XMMs.
   1743   if (Primitive::IsFloatingPointType(select->GetType())) {
   1744     return false;
   1745   }
   1746 
   1747   // A FP condition doesn't generate the single CC that we need.
   1748   HInstruction* condition = select->GetCondition();
   1749   if (condition->IsCondition() &&
   1750       Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
   1751     return false;
   1752   }
   1753 
   1754   // We can generate a CMOV for this Select.
   1755   return true;
   1756 }
   1757 
   1758 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
   1759   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   1760   if (Primitive::IsFloatingPointType(select->GetType())) {
   1761     locations->SetInAt(0, Location::RequiresFpuRegister());
   1762     locations->SetInAt(1, Location::Any());
   1763   } else {
   1764     locations->SetInAt(0, Location::RequiresRegister());
   1765     if (SelectCanUseCMOV(select)) {
   1766       if (select->InputAt(1)->IsConstant()) {
   1767         locations->SetInAt(1, Location::RequiresRegister());
   1768       } else {
   1769         locations->SetInAt(1, Location::Any());
   1770       }
   1771     } else {
   1772       locations->SetInAt(1, Location::Any());
   1773     }
   1774   }
   1775   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
   1776     locations->SetInAt(2, Location::RequiresRegister());
   1777   }
   1778   locations->SetOut(Location::SameAsFirstInput());
   1779 }
   1780 
   1781 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
   1782   LocationSummary* locations = select->GetLocations();
   1783   if (SelectCanUseCMOV(select)) {
   1784     // If both the condition and the source types are integer, we can generate
   1785     // a CMOV to implement Select.
   1786     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
   1787     Location value_true_loc = locations->InAt(1);
   1788     DCHECK(locations->InAt(0).Equals(locations->Out()));
   1789 
   1790     HInstruction* select_condition = select->GetCondition();
   1791     Condition cond = kNotEqual;
   1792 
   1793     // Figure out how to test the 'condition'.
   1794     if (select_condition->IsCondition()) {
   1795       HCondition* condition = select_condition->AsCondition();
   1796       if (!condition->IsEmittedAtUseSite()) {
   1797         // This was a previously materialized condition.
   1798         // Can we use the existing condition code?
   1799         if (AreEflagsSetFrom(condition, select)) {
   1800           // Materialization was the previous instruction.  Condition codes are right.
   1801           cond = X86_64IntegerCondition(condition->GetCondition());
   1802         } else {
   1803           // No, we have to recreate the condition code.
   1804           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
   1805           __ testl(cond_reg, cond_reg);
   1806         }
   1807       } else {
   1808         GenerateCompareTest(condition);
   1809         cond = X86_64IntegerCondition(condition->GetCondition());
   1810       }
   1811     } else {
   1812       // Must be a Boolean condition, which needs to be compared to 0.
   1813       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
   1814       __ testl(cond_reg, cond_reg);
   1815     }
   1816 
   1817     // If the condition is true, overwrite the output, which already contains false.
   1818     // Generate the correct sized CMOV.
   1819     bool is_64_bit = Primitive::Is64BitType(select->GetType());
   1820     if (value_true_loc.IsRegister()) {
   1821       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
   1822     } else {
   1823       __ cmov(cond,
   1824               value_false,
   1825               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
   1826     }
   1827   } else {
   1828     NearLabel false_target;
   1829     GenerateTestAndBranch<NearLabel>(select,
   1830                                      /* condition_input_index */ 2,
   1831                                      /* true_target */ nullptr,
   1832                                      &false_target);
   1833     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
   1834     __ Bind(&false_target);
   1835   }
   1836 }
   1837 
   1838 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   1839   new (GetGraph()->GetArena()) LocationSummary(info);
   1840 }
   1841 
   1842 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
   1843   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
   1844 }
   1845 
   1846 void CodeGeneratorX86_64::GenerateNop() {
   1847   __ nop();
   1848 }
   1849 
   1850 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   1851   LocationSummary* locations =
   1852       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   1853   // Handle the long/FP comparisons made in instruction simplification.
   1854   switch (cond->InputAt(0)->GetType()) {
   1855     case Primitive::kPrimLong:
   1856       locations->SetInAt(0, Location::RequiresRegister());
   1857       locations->SetInAt(1, Location::Any());
   1858       break;
   1859     case Primitive::kPrimFloat:
   1860     case Primitive::kPrimDouble:
   1861       locations->SetInAt(0, Location::RequiresFpuRegister());
   1862       locations->SetInAt(1, Location::Any());
   1863       break;
   1864     default:
   1865       locations->SetInAt(0, Location::RequiresRegister());
   1866       locations->SetInAt(1, Location::Any());
   1867       break;
   1868   }
   1869   if (!cond->IsEmittedAtUseSite()) {
   1870     locations->SetOut(Location::RequiresRegister());
   1871   }
   1872 }
   1873 
   1874 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
   1875   if (cond->IsEmittedAtUseSite()) {
   1876     return;
   1877   }
   1878 
   1879   LocationSummary* locations = cond->GetLocations();
   1880   Location lhs = locations->InAt(0);
   1881   Location rhs = locations->InAt(1);
   1882   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
   1883   NearLabel true_label, false_label;
   1884 
   1885   switch (cond->InputAt(0)->GetType()) {
   1886     default:
   1887       // Integer case.
   1888 
   1889       // Clear output register: setcc only sets the low byte.
   1890       __ xorl(reg, reg);
   1891 
   1892       codegen_->GenerateIntCompare(lhs, rhs);
   1893       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
   1894       return;
   1895     case Primitive::kPrimLong:
   1896       // Clear output register: setcc only sets the low byte.
   1897       __ xorl(reg, reg);
   1898 
   1899       codegen_->GenerateLongCompare(lhs, rhs);
   1900       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
   1901       return;
   1902     case Primitive::kPrimFloat: {
   1903       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
   1904       if (rhs.IsConstant()) {
   1905         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
   1906         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
   1907       } else if (rhs.IsStackSlot()) {
   1908         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1909       } else {
   1910         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
   1911       }
   1912       GenerateFPJumps(cond, &true_label, &false_label);
   1913       break;
   1914     }
   1915     case Primitive::kPrimDouble: {
   1916       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
   1917       if (rhs.IsConstant()) {
   1918         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
   1919         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
   1920       } else if (rhs.IsDoubleStackSlot()) {
   1921         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1922       } else {
   1923         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
   1924       }
   1925       GenerateFPJumps(cond, &true_label, &false_label);
   1926       break;
   1927     }
   1928   }
   1929 
   1930   // Convert the jumps into the result.
   1931   NearLabel done_label;
   1932 
   1933   // False case: result = 0.
   1934   __ Bind(&false_label);
   1935   __ xorl(reg, reg);
   1936   __ jmp(&done_label);
   1937 
   1938   // True case: result = 1.
   1939   __ Bind(&true_label);
   1940   __ movl(reg, Immediate(1));
   1941   __ Bind(&done_label);
   1942 }
   1943 
   1944 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
   1945   HandleCondition(comp);
   1946 }
   1947 
   1948 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
   1949   HandleCondition(comp);
   1950 }
   1951 
   1952 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
   1953   HandleCondition(comp);
   1954 }
   1955 
   1956 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
   1957   HandleCondition(comp);
   1958 }
   1959 
   1960 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
   1961   HandleCondition(comp);
   1962 }
   1963 
   1964 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
   1965   HandleCondition(comp);
   1966 }
   1967 
   1968 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
   1969   HandleCondition(comp);
   1970 }
   1971 
   1972 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
   1973   HandleCondition(comp);
   1974 }
   1975 
   1976 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
   1977   HandleCondition(comp);
   1978 }
   1979 
   1980 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
   1981   HandleCondition(comp);
   1982 }
   1983 
   1984 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
   1985   HandleCondition(comp);
   1986 }
   1987 
   1988 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
   1989   HandleCondition(comp);
   1990 }
   1991 
   1992 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
   1993   HandleCondition(comp);
   1994 }
   1995 
   1996 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
   1997   HandleCondition(comp);
   1998 }
   1999 
   2000 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
   2001   HandleCondition(comp);
   2002 }
   2003 
   2004 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
   2005   HandleCondition(comp);
   2006 }
   2007 
   2008 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
   2009   HandleCondition(comp);
   2010 }
   2011 
   2012 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
   2013   HandleCondition(comp);
   2014 }
   2015 
   2016 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
   2017   HandleCondition(comp);
   2018 }
   2019 
   2020 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
   2021   HandleCondition(comp);
   2022 }
   2023 
   2024 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   2025   LocationSummary* locations =
   2026       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   2027   switch (compare->InputAt(0)->GetType()) {
   2028     case Primitive::kPrimBoolean:
   2029     case Primitive::kPrimByte:
   2030     case Primitive::kPrimShort:
   2031     case Primitive::kPrimChar:
   2032     case Primitive::kPrimInt:
   2033     case Primitive::kPrimLong: {
   2034       locations->SetInAt(0, Location::RequiresRegister());
   2035       locations->SetInAt(1, Location::Any());
   2036       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2037       break;
   2038     }
   2039     case Primitive::kPrimFloat:
   2040     case Primitive::kPrimDouble: {
   2041       locations->SetInAt(0, Location::RequiresFpuRegister());
   2042       locations->SetInAt(1, Location::Any());
   2043       locations->SetOut(Location::RequiresRegister());
   2044       break;
   2045     }
   2046     default:
   2047       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
   2048   }
   2049 }
   2050 
   2051 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
   2052   LocationSummary* locations = compare->GetLocations();
   2053   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   2054   Location left = locations->InAt(0);
   2055   Location right = locations->InAt(1);
   2056 
   2057   NearLabel less, greater, done;
   2058   Primitive::Type type = compare->InputAt(0)->GetType();
   2059   Condition less_cond = kLess;
   2060 
   2061   switch (type) {
   2062     case Primitive::kPrimBoolean:
   2063     case Primitive::kPrimByte:
   2064     case Primitive::kPrimShort:
   2065     case Primitive::kPrimChar:
   2066     case Primitive::kPrimInt: {
   2067       codegen_->GenerateIntCompare(left, right);
   2068       break;
   2069     }
   2070     case Primitive::kPrimLong: {
   2071       codegen_->GenerateLongCompare(left, right);
   2072       break;
   2073     }
   2074     case Primitive::kPrimFloat: {
   2075       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
   2076       if (right.IsConstant()) {
   2077         float value = right.GetConstant()->AsFloatConstant()->GetValue();
   2078         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
   2079       } else if (right.IsStackSlot()) {
   2080         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   2081       } else {
   2082         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
   2083       }
   2084       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
   2085       less_cond = kBelow;  //  ucomis{s,d} sets CF
   2086       break;
   2087     }
   2088     case Primitive::kPrimDouble: {
   2089       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
   2090       if (right.IsConstant()) {
   2091         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
   2092         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
   2093       } else if (right.IsDoubleStackSlot()) {
   2094         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   2095       } else {
   2096         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
   2097       }
   2098       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
   2099       less_cond = kBelow;  //  ucomis{s,d} sets CF
   2100       break;
   2101     }
   2102     default:
   2103       LOG(FATAL) << "Unexpected compare type " << type;
   2104   }
   2105 
   2106   __ movl(out, Immediate(0));
   2107   __ j(kEqual, &done);
   2108   __ j(less_cond, &less);
   2109 
   2110   __ Bind(&greater);
   2111   __ movl(out, Immediate(1));
   2112   __ jmp(&done);
   2113 
   2114   __ Bind(&less);
   2115   __ movl(out, Immediate(-1));
   2116 
   2117   __ Bind(&done);
   2118 }
   2119 
   2120 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   2121   LocationSummary* locations =
   2122       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   2123   locations->SetOut(Location::ConstantLocation(constant));
   2124 }
   2125 
   2126 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   2127   // Will be generated at use site.
   2128 }
   2129 
   2130 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
   2131   LocationSummary* locations =
   2132       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   2133   locations->SetOut(Location::ConstantLocation(constant));
   2134 }
   2135 
   2136 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   2137   // Will be generated at use site.
   2138 }
   2139 
   2140 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
   2141   LocationSummary* locations =
   2142       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   2143   locations->SetOut(Location::ConstantLocation(constant));
   2144 }
   2145 
   2146 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   2147   // Will be generated at use site.
   2148 }
   2149 
   2150 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
   2151   LocationSummary* locations =
   2152       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   2153   locations->SetOut(Location::ConstantLocation(constant));
   2154 }
   2155 
   2156 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   2157   // Will be generated at use site.
   2158 }
   2159 
   2160 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
   2161   LocationSummary* locations =
   2162       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   2163   locations->SetOut(Location::ConstantLocation(constant));
   2164 }
   2165 
   2166 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
   2167     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   2168   // Will be generated at use site.
   2169 }
   2170 
   2171 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
   2172   constructor_fence->SetLocations(nullptr);
   2173 }
   2174 
   2175 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
   2176     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
   2177   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   2178 }
   2179 
   2180 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   2181   memory_barrier->SetLocations(nullptr);
   2182 }
   2183 
   2184 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   2185   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
   2186 }
   2187 
   2188 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
   2189   ret->SetLocations(nullptr);
   2190 }
   2191 
   2192 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
   2193   codegen_->GenerateFrameExit();
   2194 }
   2195 
   2196 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
   2197   LocationSummary* locations =
   2198       new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
   2199   switch (ret->InputAt(0)->GetType()) {
   2200     case Primitive::kPrimBoolean:
   2201     case Primitive::kPrimByte:
   2202     case Primitive::kPrimChar:
   2203     case Primitive::kPrimShort:
   2204     case Primitive::kPrimInt:
   2205     case Primitive::kPrimNot:
   2206     case Primitive::kPrimLong:
   2207       locations->SetInAt(0, Location::RegisterLocation(RAX));
   2208       break;
   2209 
   2210     case Primitive::kPrimFloat:
   2211     case Primitive::kPrimDouble:
   2212       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
   2213       break;
   2214 
   2215     default:
   2216       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   2217   }
   2218 }
   2219 
   2220 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
   2221   if (kIsDebugBuild) {
   2222     switch (ret->InputAt(0)->GetType()) {
   2223       case Primitive::kPrimBoolean:
   2224       case Primitive::kPrimByte:
   2225       case Primitive::kPrimChar:
   2226       case Primitive::kPrimShort:
   2227       case Primitive::kPrimInt:
   2228       case Primitive::kPrimNot:
   2229       case Primitive::kPrimLong:
   2230         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
   2231         break;
   2232 
   2233       case Primitive::kPrimFloat:
   2234       case Primitive::kPrimDouble:
   2235         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
   2236                   XMM0);
   2237         break;
   2238 
   2239       default:
   2240         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   2241     }
   2242   }
   2243   codegen_->GenerateFrameExit();
   2244 }
   2245 
   2246 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
   2247   switch (type) {
   2248     case Primitive::kPrimBoolean:
   2249     case Primitive::kPrimByte:
   2250     case Primitive::kPrimChar:
   2251     case Primitive::kPrimShort:
   2252     case Primitive::kPrimInt:
   2253     case Primitive::kPrimNot:
   2254     case Primitive::kPrimLong:
   2255       return Location::RegisterLocation(RAX);
   2256 
   2257     case Primitive::kPrimVoid:
   2258       return Location::NoLocation();
   2259 
   2260     case Primitive::kPrimDouble:
   2261     case Primitive::kPrimFloat:
   2262       return Location::FpuRegisterLocation(XMM0);
   2263   }
   2264 
   2265   UNREACHABLE();
   2266 }
   2267 
   2268 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
   2269   return Location::RegisterLocation(kMethodRegisterArgument);
   2270 }
   2271 
   2272 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
   2273   switch (type) {
   2274     case Primitive::kPrimBoolean:
   2275     case Primitive::kPrimByte:
   2276     case Primitive::kPrimChar:
   2277     case Primitive::kPrimShort:
   2278     case Primitive::kPrimInt:
   2279     case Primitive::kPrimNot: {
   2280       uint32_t index = gp_index_++;
   2281       stack_index_++;
   2282       if (index < calling_convention.GetNumberOfRegisters()) {
   2283         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
   2284       } else {
   2285         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
   2286       }
   2287     }
   2288 
   2289     case Primitive::kPrimLong: {
   2290       uint32_t index = gp_index_;
   2291       stack_index_ += 2;
   2292       if (index < calling_convention.GetNumberOfRegisters()) {
   2293         gp_index_ += 1;
   2294         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
   2295       } else {
   2296         gp_index_ += 2;
   2297         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
   2298       }
   2299     }
   2300 
   2301     case Primitive::kPrimFloat: {
   2302       uint32_t index = float_index_++;
   2303       stack_index_++;
   2304       if (index < calling_convention.GetNumberOfFpuRegisters()) {
   2305         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
   2306       } else {
   2307         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
   2308       }
   2309     }
   2310 
   2311     case Primitive::kPrimDouble: {
   2312       uint32_t index = float_index_++;
   2313       stack_index_ += 2;
   2314       if (index < calling_convention.GetNumberOfFpuRegisters()) {
   2315         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
   2316       } else {
   2317         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
   2318       }
   2319     }
   2320 
   2321     case Primitive::kPrimVoid:
   2322       LOG(FATAL) << "Unexpected parameter type " << type;
   2323       break;
   2324   }
   2325   return Location::NoLocation();
   2326 }
   2327 
   2328 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   2329   // The trampoline uses the same calling convention as dex calling conventions,
   2330   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
   2331   // the method_idx.
   2332   HandleInvoke(invoke);
   2333 }
   2334 
   2335 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   2336   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
   2337 }
   2338 
   2339 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   2340   // Explicit clinit checks triggered by static invokes must have been pruned by
   2341   // art::PrepareForRegisterAllocation.
   2342   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   2343 
   2344   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   2345   if (intrinsic.TryDispatch(invoke)) {
   2346     return;
   2347   }
   2348 
   2349   HandleInvoke(invoke);
   2350 }
   2351 
   2352 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
   2353   if (invoke->GetLocations()->Intrinsified()) {
   2354     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
   2355     intrinsic.Dispatch(invoke);
   2356     return true;
   2357   }
   2358   return false;
   2359 }
   2360 
   2361 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   2362   // Explicit clinit checks triggered by static invokes must have been pruned by
   2363   // art::PrepareForRegisterAllocation.
   2364   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   2365 
   2366   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   2367     return;
   2368   }
   2369 
   2370   LocationSummary* locations = invoke->GetLocations();
   2371   codegen_->GenerateStaticOrDirectCall(
   2372       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
   2373 }
   2374 
   2375 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
   2376   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
   2377   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
   2378 }
   2379 
   2380 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   2381   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   2382   if (intrinsic.TryDispatch(invoke)) {
   2383     return;
   2384   }
   2385 
   2386   HandleInvoke(invoke);
   2387 }
   2388 
   2389 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   2390   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   2391     return;
   2392   }
   2393 
   2394   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   2395   DCHECK(!codegen_->IsLeafMethod());
   2396 }
   2397 
   2398 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   2399   HandleInvoke(invoke);
   2400   // Add the hidden argument.
   2401   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
   2402 }
   2403 
   2404 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   2405   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   2406   LocationSummary* locations = invoke->GetLocations();
   2407   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   2408   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   2409   Location receiver = locations->InAt(0);
   2410   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
   2411 
   2412   // Set the hidden argument. This is safe to do this here, as RAX
   2413   // won't be modified thereafter, before the `call` instruction.
   2414   DCHECK_EQ(RAX, hidden_reg.AsRegister());
   2415   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
   2416 
   2417   if (receiver.IsStackSlot()) {
   2418     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
   2419     // /* HeapReference<Class> */ temp = temp->klass_
   2420     __ movl(temp, Address(temp, class_offset));
   2421   } else {
   2422     // /* HeapReference<Class> */ temp = receiver->klass_
   2423     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   2424   }
   2425   codegen_->MaybeRecordImplicitNullCheck(invoke);
   2426   // Instead of simply (possibly) unpoisoning `temp` here, we should
   2427   // emit a read barrier for the previous class reference load.
   2428   // However this is not required in practice, as this is an
   2429   // intermediate/temporary reference and because the current
   2430   // concurrent copying collector keeps the from-space memory
   2431   // intact/accessible until the end of the marking phase (the
   2432   // concurrent copying collector may not in the future).
   2433   __ MaybeUnpoisonHeapReference(temp);
   2434   // temp = temp->GetAddressOfIMT()
   2435   __ movq(temp,
   2436       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   2437   // temp = temp->GetImtEntryAt(method_offset);
   2438   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   2439       invoke->GetImtIndex(), kX86_64PointerSize));
   2440   // temp = temp->GetImtEntryAt(method_offset);
   2441   __ movq(temp, Address(temp, method_offset));
   2442   // call temp->GetEntryPoint();
   2443   __ call(Address(
   2444       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
   2445 
   2446   DCHECK(!codegen_->IsLeafMethod());
   2447   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   2448 }
   2449 
   2450 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   2451   HandleInvoke(invoke);
   2452 }
   2453 
   2454 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   2455   codegen_->GenerateInvokePolymorphicCall(invoke);
   2456 }
   2457 
   2458 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
   2459   LocationSummary* locations =
   2460       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   2461   switch (neg->GetResultType()) {
   2462     case Primitive::kPrimInt:
   2463     case Primitive::kPrimLong:
   2464       locations->SetInAt(0, Location::RequiresRegister());
   2465       locations->SetOut(Location::SameAsFirstInput());
   2466       break;
   2467 
   2468     case Primitive::kPrimFloat:
   2469     case Primitive::kPrimDouble:
   2470       locations->SetInAt(0, Location::RequiresFpuRegister());
   2471       locations->SetOut(Location::SameAsFirstInput());
   2472       locations->AddTemp(Location::RequiresFpuRegister());
   2473       break;
   2474 
   2475     default:
   2476       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   2477   }
   2478 }
   2479 
   2480 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
   2481   LocationSummary* locations = neg->GetLocations();
   2482   Location out = locations->Out();
   2483   Location in = locations->InAt(0);
   2484   switch (neg->GetResultType()) {
   2485     case Primitive::kPrimInt:
   2486       DCHECK(in.IsRegister());
   2487       DCHECK(in.Equals(out));
   2488       __ negl(out.AsRegister<CpuRegister>());
   2489       break;
   2490 
   2491     case Primitive::kPrimLong:
   2492       DCHECK(in.IsRegister());
   2493       DCHECK(in.Equals(out));
   2494       __ negq(out.AsRegister<CpuRegister>());
   2495       break;
   2496 
   2497     case Primitive::kPrimFloat: {
   2498       DCHECK(in.Equals(out));
   2499       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2500       // Implement float negation with an exclusive or with value
   2501       // 0x80000000 (mask for bit 31, representing the sign of a
   2502       // single-precision floating-point number).
   2503       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
   2504       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
   2505       break;
   2506     }
   2507 
   2508     case Primitive::kPrimDouble: {
   2509       DCHECK(in.Equals(out));
   2510       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2511       // Implement double negation with an exclusive or with value
   2512       // 0x8000000000000000 (mask for bit 63, representing the sign of
   2513       // a double-precision floating-point number).
   2514       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
   2515       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
   2516       break;
   2517     }
   2518 
   2519     default:
   2520       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   2521   }
   2522 }
   2523 
   2524 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
   2525   LocationSummary* locations =
   2526       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   2527   Primitive::Type result_type = conversion->GetResultType();
   2528   Primitive::Type input_type = conversion->GetInputType();
   2529   DCHECK_NE(result_type, input_type);
   2530 
   2531   // The Java language does not allow treating boolean as an integral type but
   2532   // our bit representation makes it safe.
   2533 
   2534   switch (result_type) {
   2535     case Primitive::kPrimByte:
   2536       switch (input_type) {
   2537         case Primitive::kPrimLong:
   2538           // Type conversion from long to byte is a result of code transformations.
   2539         case Primitive::kPrimBoolean:
   2540           // Boolean input is a result of code transformations.
   2541         case Primitive::kPrimShort:
   2542         case Primitive::kPrimInt:
   2543         case Primitive::kPrimChar:
   2544           // Processing a Dex `int-to-byte' instruction.
   2545           locations->SetInAt(0, Location::Any());
   2546           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2547           break;
   2548 
   2549         default:
   2550           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2551                      << " to " << result_type;
   2552       }
   2553       break;
   2554 
   2555     case Primitive::kPrimShort:
   2556       switch (input_type) {
   2557         case Primitive::kPrimLong:
   2558           // Type conversion from long to short is a result of code transformations.
   2559         case Primitive::kPrimBoolean:
   2560           // Boolean input is a result of code transformations.
   2561         case Primitive::kPrimByte:
   2562         case Primitive::kPrimInt:
   2563         case Primitive::kPrimChar:
   2564           // Processing a Dex `int-to-short' instruction.
   2565           locations->SetInAt(0, Location::Any());
   2566           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2567           break;
   2568 
   2569         default:
   2570           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2571                      << " to " << result_type;
   2572       }
   2573       break;
   2574 
   2575     case Primitive::kPrimInt:
   2576       switch (input_type) {
   2577         case Primitive::kPrimLong:
   2578           // Processing a Dex `long-to-int' instruction.
   2579           locations->SetInAt(0, Location::Any());
   2580           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2581           break;
   2582 
   2583         case Primitive::kPrimFloat:
   2584           // Processing a Dex `float-to-int' instruction.
   2585           locations->SetInAt(0, Location::RequiresFpuRegister());
   2586           locations->SetOut(Location::RequiresRegister());
   2587           break;
   2588 
   2589         case Primitive::kPrimDouble:
   2590           // Processing a Dex `double-to-int' instruction.
   2591           locations->SetInAt(0, Location::RequiresFpuRegister());
   2592           locations->SetOut(Location::RequiresRegister());
   2593           break;
   2594 
   2595         default:
   2596           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2597                      << " to " << result_type;
   2598       }
   2599       break;
   2600 
   2601     case Primitive::kPrimLong:
   2602       switch (input_type) {
   2603         case Primitive::kPrimBoolean:
   2604           // Boolean input is a result of code transformations.
   2605         case Primitive::kPrimByte:
   2606         case Primitive::kPrimShort:
   2607         case Primitive::kPrimInt:
   2608         case Primitive::kPrimChar:
   2609           // Processing a Dex `int-to-long' instruction.
   2610           // TODO: We would benefit from a (to-be-implemented)
   2611           // Location::RegisterOrStackSlot requirement for this input.
   2612           locations->SetInAt(0, Location::RequiresRegister());
   2613           locations->SetOut(Location::RequiresRegister());
   2614           break;
   2615 
   2616         case Primitive::kPrimFloat:
   2617           // Processing a Dex `float-to-long' instruction.
   2618           locations->SetInAt(0, Location::RequiresFpuRegister());
   2619           locations->SetOut(Location::RequiresRegister());
   2620           break;
   2621 
   2622         case Primitive::kPrimDouble:
   2623           // Processing a Dex `double-to-long' instruction.
   2624           locations->SetInAt(0, Location::RequiresFpuRegister());
   2625           locations->SetOut(Location::RequiresRegister());
   2626           break;
   2627 
   2628         default:
   2629           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2630                      << " to " << result_type;
   2631       }
   2632       break;
   2633 
   2634     case Primitive::kPrimChar:
   2635       switch (input_type) {
   2636         case Primitive::kPrimLong:
   2637           // Type conversion from long to char is a result of code transformations.
   2638         case Primitive::kPrimBoolean:
   2639           // Boolean input is a result of code transformations.
   2640         case Primitive::kPrimByte:
   2641         case Primitive::kPrimShort:
   2642         case Primitive::kPrimInt:
   2643           // Processing a Dex `int-to-char' instruction.
   2644           locations->SetInAt(0, Location::Any());
   2645           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2646           break;
   2647 
   2648         default:
   2649           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2650                      << " to " << result_type;
   2651       }
   2652       break;
   2653 
   2654     case Primitive::kPrimFloat:
   2655       switch (input_type) {
   2656         case Primitive::kPrimBoolean:
   2657           // Boolean input is a result of code transformations.
   2658         case Primitive::kPrimByte:
   2659         case Primitive::kPrimShort:
   2660         case Primitive::kPrimInt:
   2661         case Primitive::kPrimChar:
   2662           // Processing a Dex `int-to-float' instruction.
   2663           locations->SetInAt(0, Location::Any());
   2664           locations->SetOut(Location::RequiresFpuRegister());
   2665           break;
   2666 
   2667         case Primitive::kPrimLong:
   2668           // Processing a Dex `long-to-float' instruction.
   2669           locations->SetInAt(0, Location::Any());
   2670           locations->SetOut(Location::RequiresFpuRegister());
   2671           break;
   2672 
   2673         case Primitive::kPrimDouble:
   2674           // Processing a Dex `double-to-float' instruction.
   2675           locations->SetInAt(0, Location::Any());
   2676           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2677           break;
   2678 
   2679         default:
   2680           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2681                      << " to " << result_type;
   2682       };
   2683       break;
   2684 
   2685     case Primitive::kPrimDouble:
   2686       switch (input_type) {
   2687         case Primitive::kPrimBoolean:
   2688           // Boolean input is a result of code transformations.
   2689         case Primitive::kPrimByte:
   2690         case Primitive::kPrimShort:
   2691         case Primitive::kPrimInt:
   2692         case Primitive::kPrimChar:
   2693           // Processing a Dex `int-to-double' instruction.
   2694           locations->SetInAt(0, Location::Any());
   2695           locations->SetOut(Location::RequiresFpuRegister());
   2696           break;
   2697 
   2698         case Primitive::kPrimLong:
   2699           // Processing a Dex `long-to-double' instruction.
   2700           locations->SetInAt(0, Location::Any());
   2701           locations->SetOut(Location::RequiresFpuRegister());
   2702           break;
   2703 
   2704         case Primitive::kPrimFloat:
   2705           // Processing a Dex `float-to-double' instruction.
   2706           locations->SetInAt(0, Location::Any());
   2707           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2708           break;
   2709 
   2710         default:
   2711           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2712                      << " to " << result_type;
   2713       }
   2714       break;
   2715 
   2716     default:
   2717       LOG(FATAL) << "Unexpected type conversion from " << input_type
   2718                  << " to " << result_type;
   2719   }
   2720 }
   2721 
   2722 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
   2723   LocationSummary* locations = conversion->GetLocations();
   2724   Location out = locations->Out();
   2725   Location in = locations->InAt(0);
   2726   Primitive::Type result_type = conversion->GetResultType();
   2727   Primitive::Type input_type = conversion->GetInputType();
   2728   DCHECK_NE(result_type, input_type);
   2729   switch (result_type) {
   2730     case Primitive::kPrimByte:
   2731       switch (input_type) {
   2732         case Primitive::kPrimLong:
   2733           // Type conversion from long to byte is a result of code transformations.
   2734         case Primitive::kPrimBoolean:
   2735           // Boolean input is a result of code transformations.
   2736         case Primitive::kPrimShort:
   2737         case Primitive::kPrimInt:
   2738         case Primitive::kPrimChar:
   2739           // Processing a Dex `int-to-byte' instruction.
   2740           if (in.IsRegister()) {
   2741             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2742           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2743             __ movsxb(out.AsRegister<CpuRegister>(),
   2744                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2745           } else {
   2746             __ movl(out.AsRegister<CpuRegister>(),
   2747                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
   2748           }
   2749           break;
   2750 
   2751         default:
   2752           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2753                      << " to " << result_type;
   2754       }
   2755       break;
   2756 
   2757     case Primitive::kPrimShort:
   2758       switch (input_type) {
   2759         case Primitive::kPrimLong:
   2760           // Type conversion from long to short is a result of code transformations.
   2761         case Primitive::kPrimBoolean:
   2762           // Boolean input is a result of code transformations.
   2763         case Primitive::kPrimByte:
   2764         case Primitive::kPrimInt:
   2765         case Primitive::kPrimChar:
   2766           // Processing a Dex `int-to-short' instruction.
   2767           if (in.IsRegister()) {
   2768             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2769           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2770             __ movsxw(out.AsRegister<CpuRegister>(),
   2771                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2772           } else {
   2773             __ movl(out.AsRegister<CpuRegister>(),
   2774                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
   2775           }
   2776           break;
   2777 
   2778         default:
   2779           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2780                      << " to " << result_type;
   2781       }
   2782       break;
   2783 
   2784     case Primitive::kPrimInt:
   2785       switch (input_type) {
   2786         case Primitive::kPrimLong:
   2787           // Processing a Dex `long-to-int' instruction.
   2788           if (in.IsRegister()) {
   2789             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2790           } else if (in.IsDoubleStackSlot()) {
   2791             __ movl(out.AsRegister<CpuRegister>(),
   2792                     Address(CpuRegister(RSP), in.GetStackIndex()));
   2793           } else {
   2794             DCHECK(in.IsConstant());
   2795             DCHECK(in.GetConstant()->IsLongConstant());
   2796             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
   2797             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
   2798           }
   2799           break;
   2800 
   2801         case Primitive::kPrimFloat: {
   2802           // Processing a Dex `float-to-int' instruction.
   2803           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2804           CpuRegister output = out.AsRegister<CpuRegister>();
   2805           NearLabel done, nan;
   2806 
   2807           __ movl(output, Immediate(kPrimIntMax));
   2808           // if input >= (float)INT_MAX goto done
   2809           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
   2810           __ j(kAboveEqual, &done);
   2811           // if input == NaN goto nan
   2812           __ j(kUnordered, &nan);
   2813           // output = float-to-int-truncate(input)
   2814           __ cvttss2si(output, input, false);
   2815           __ jmp(&done);
   2816           __ Bind(&nan);
   2817           //  output = 0
   2818           __ xorl(output, output);
   2819           __ Bind(&done);
   2820           break;
   2821         }
   2822 
   2823         case Primitive::kPrimDouble: {
   2824           // Processing a Dex `double-to-int' instruction.
   2825           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2826           CpuRegister output = out.AsRegister<CpuRegister>();
   2827           NearLabel done, nan;
   2828 
   2829           __ movl(output, Immediate(kPrimIntMax));
   2830           // if input >= (double)INT_MAX goto done
   2831           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
   2832           __ j(kAboveEqual, &done);
   2833           // if input == NaN goto nan
   2834           __ j(kUnordered, &nan);
   2835           // output = double-to-int-truncate(input)
   2836           __ cvttsd2si(output, input);
   2837           __ jmp(&done);
   2838           __ Bind(&nan);
   2839           //  output = 0
   2840           __ xorl(output, output);
   2841           __ Bind(&done);
   2842           break;
   2843         }
   2844 
   2845         default:
   2846           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2847                      << " to " << result_type;
   2848       }
   2849       break;
   2850 
   2851     case Primitive::kPrimLong:
   2852       switch (input_type) {
   2853         DCHECK(out.IsRegister());
   2854         case Primitive::kPrimBoolean:
   2855           // Boolean input is a result of code transformations.
   2856         case Primitive::kPrimByte:
   2857         case Primitive::kPrimShort:
   2858         case Primitive::kPrimInt:
   2859         case Primitive::kPrimChar:
   2860           // Processing a Dex `int-to-long' instruction.
   2861           DCHECK(in.IsRegister());
   2862           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2863           break;
   2864 
   2865         case Primitive::kPrimFloat: {
   2866           // Processing a Dex `float-to-long' instruction.
   2867           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2868           CpuRegister output = out.AsRegister<CpuRegister>();
   2869           NearLabel done, nan;
   2870 
   2871           codegen_->Load64BitValue(output, kPrimLongMax);
   2872           // if input >= (float)LONG_MAX goto done
   2873           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
   2874           __ j(kAboveEqual, &done);
   2875           // if input == NaN goto nan
   2876           __ j(kUnordered, &nan);
   2877           // output = float-to-long-truncate(input)
   2878           __ cvttss2si(output, input, true);
   2879           __ jmp(&done);
   2880           __ Bind(&nan);
   2881           //  output = 0
   2882           __ xorl(output, output);
   2883           __ Bind(&done);
   2884           break;
   2885         }
   2886 
   2887         case Primitive::kPrimDouble: {
   2888           // Processing a Dex `double-to-long' instruction.
   2889           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2890           CpuRegister output = out.AsRegister<CpuRegister>();
   2891           NearLabel done, nan;
   2892 
   2893           codegen_->Load64BitValue(output, kPrimLongMax);
   2894           // if input >= (double)LONG_MAX goto done
   2895           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
   2896           __ j(kAboveEqual, &done);
   2897           // if input == NaN goto nan
   2898           __ j(kUnordered, &nan);
   2899           // output = double-to-long-truncate(input)
   2900           __ cvttsd2si(output, input, true);
   2901           __ jmp(&done);
   2902           __ Bind(&nan);
   2903           //  output = 0
   2904           __ xorl(output, output);
   2905           __ Bind(&done);
   2906           break;
   2907         }
   2908 
   2909         default:
   2910           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2911                      << " to " << result_type;
   2912       }
   2913       break;
   2914 
   2915     case Primitive::kPrimChar:
   2916       switch (input_type) {
   2917         case Primitive::kPrimLong:
   2918           // Type conversion from long to char is a result of code transformations.
   2919         case Primitive::kPrimBoolean:
   2920           // Boolean input is a result of code transformations.
   2921         case Primitive::kPrimByte:
   2922         case Primitive::kPrimShort:
   2923         case Primitive::kPrimInt:
   2924           // Processing a Dex `int-to-char' instruction.
   2925           if (in.IsRegister()) {
   2926             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2927           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2928             __ movzxw(out.AsRegister<CpuRegister>(),
   2929                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2930           } else {
   2931             __ movl(out.AsRegister<CpuRegister>(),
   2932                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
   2933           }
   2934           break;
   2935 
   2936         default:
   2937           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2938                      << " to " << result_type;
   2939       }
   2940       break;
   2941 
   2942     case Primitive::kPrimFloat:
   2943       switch (input_type) {
   2944         case Primitive::kPrimBoolean:
   2945           // Boolean input is a result of code transformations.
   2946         case Primitive::kPrimByte:
   2947         case Primitive::kPrimShort:
   2948         case Primitive::kPrimInt:
   2949         case Primitive::kPrimChar:
   2950           // Processing a Dex `int-to-float' instruction.
   2951           if (in.IsRegister()) {
   2952             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
   2953           } else if (in.IsConstant()) {
   2954             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
   2955             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2956             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2957           } else {
   2958             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
   2959                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
   2960           }
   2961           break;
   2962 
   2963         case Primitive::kPrimLong:
   2964           // Processing a Dex `long-to-float' instruction.
   2965           if (in.IsRegister()) {
   2966             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
   2967           } else if (in.IsConstant()) {
   2968             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
   2969             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2970             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2971           } else {
   2972             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
   2973                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
   2974           }
   2975           break;
   2976 
   2977         case Primitive::kPrimDouble:
   2978           // Processing a Dex `double-to-float' instruction.
   2979           if (in.IsFpuRegister()) {
   2980             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
   2981           } else if (in.IsConstant()) {
   2982             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
   2983             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2984             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2985           } else {
   2986             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
   2987                         Address(CpuRegister(RSP), in.GetStackIndex()));
   2988           }
   2989           break;
   2990 
   2991         default:
   2992           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2993                      << " to " << result_type;
   2994       };
   2995       break;
   2996 
   2997     case Primitive::kPrimDouble:
   2998       switch (input_type) {
   2999         case Primitive::kPrimBoolean:
   3000           // Boolean input is a result of code transformations.
   3001         case Primitive::kPrimByte:
   3002         case Primitive::kPrimShort:
   3003         case Primitive::kPrimInt:
   3004         case Primitive::kPrimChar:
   3005           // Processing a Dex `int-to-double' instruction.
   3006           if (in.IsRegister()) {
   3007             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
   3008           } else if (in.IsConstant()) {
   3009             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
   3010             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   3011             codegen_->Load64BitValue(dest, static_cast<double>(v));
   3012           } else {
   3013             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
   3014                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
   3015           }
   3016           break;
   3017 
   3018         case Primitive::kPrimLong:
   3019           // Processing a Dex `long-to-double' instruction.
   3020           if (in.IsRegister()) {
   3021             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
   3022           } else if (in.IsConstant()) {
   3023             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
   3024             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   3025             codegen_->Load64BitValue(dest, static_cast<double>(v));
   3026           } else {
   3027             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
   3028                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
   3029           }
   3030           break;
   3031 
   3032         case Primitive::kPrimFloat:
   3033           // Processing a Dex `float-to-double' instruction.
   3034           if (in.IsFpuRegister()) {
   3035             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
   3036           } else if (in.IsConstant()) {
   3037             float v = in.GetConstant()->AsFloatConstant()->GetValue();
   3038             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   3039             codegen_->Load64BitValue(dest, static_cast<double>(v));
   3040           } else {
   3041             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
   3042                         Address(CpuRegister(RSP), in.GetStackIndex()));
   3043           }
   3044           break;
   3045 
   3046         default:
   3047           LOG(FATAL) << "Unexpected type conversion from " << input_type
   3048                      << " to " << result_type;
   3049       };
   3050       break;
   3051 
   3052     default:
   3053       LOG(FATAL) << "Unexpected type conversion from " << input_type
   3054                  << " to " << result_type;
   3055   }
   3056 }
   3057 
   3058 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
   3059   LocationSummary* locations =
   3060       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
   3061   switch (add->GetResultType()) {
   3062     case Primitive::kPrimInt: {
   3063       locations->SetInAt(0, Location::RequiresRegister());
   3064       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
   3065       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3066       break;
   3067     }
   3068 
   3069     case Primitive::kPrimLong: {
   3070       locations->SetInAt(0, Location::RequiresRegister());
   3071       // We can use a leaq or addq if the constant can fit in an immediate.
   3072       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
   3073       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3074       break;
   3075     }
   3076 
   3077     case Primitive::kPrimDouble:
   3078     case Primitive::kPrimFloat: {
   3079       locations->SetInAt(0, Location::RequiresFpuRegister());
   3080       locations->SetInAt(1, Location::Any());
   3081       locations->SetOut(Location::SameAsFirstInput());
   3082       break;
   3083     }
   3084 
   3085     default:
   3086       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   3087   }
   3088 }
   3089 
   3090 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   3091   LocationSummary* locations = add->GetLocations();
   3092   Location first = locations->InAt(0);
   3093   Location second = locations->InAt(1);
   3094   Location out = locations->Out();
   3095 
   3096   switch (add->GetResultType()) {
   3097     case Primitive::kPrimInt: {
   3098       if (second.IsRegister()) {
   3099         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3100           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3101         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
   3102           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
   3103         } else {
   3104           __ leal(out.AsRegister<CpuRegister>(), Address(
   3105               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
   3106         }
   3107       } else if (second.IsConstant()) {
   3108         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3109           __ addl(out.AsRegister<CpuRegister>(),
   3110                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
   3111         } else {
   3112           __ leal(out.AsRegister<CpuRegister>(), Address(
   3113               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
   3114         }
   3115       } else {
   3116         DCHECK(first.Equals(locations->Out()));
   3117         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
   3118       }
   3119       break;
   3120     }
   3121 
   3122     case Primitive::kPrimLong: {
   3123       if (second.IsRegister()) {
   3124         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3125           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3126         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
   3127           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
   3128         } else {
   3129           __ leaq(out.AsRegister<CpuRegister>(), Address(
   3130               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
   3131         }
   3132       } else {
   3133         DCHECK(second.IsConstant());
   3134         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
   3135         int32_t int32_value = Low32Bits(value);
   3136         DCHECK_EQ(int32_value, value);
   3137         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3138           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
   3139         } else {
   3140           __ leaq(out.AsRegister<CpuRegister>(), Address(
   3141               first.AsRegister<CpuRegister>(), int32_value));
   3142         }
   3143       }
   3144       break;
   3145     }
   3146 
   3147     case Primitive::kPrimFloat: {
   3148       if (second.IsFpuRegister()) {
   3149         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3150       } else if (second.IsConstant()) {
   3151         __ addss(first.AsFpuRegister<XmmRegister>(),
   3152                  codegen_->LiteralFloatAddress(
   3153                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3154       } else {
   3155         DCHECK(second.IsStackSlot());
   3156         __ addss(first.AsFpuRegister<XmmRegister>(),
   3157                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3158       }
   3159       break;
   3160     }
   3161 
   3162     case Primitive::kPrimDouble: {
   3163       if (second.IsFpuRegister()) {
   3164         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3165       } else if (second.IsConstant()) {
   3166         __ addsd(first.AsFpuRegister<XmmRegister>(),
   3167                  codegen_->LiteralDoubleAddress(
   3168                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3169       } else {
   3170         DCHECK(second.IsDoubleStackSlot());
   3171         __ addsd(first.AsFpuRegister<XmmRegister>(),
   3172                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3173       }
   3174       break;
   3175     }
   3176 
   3177     default:
   3178       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   3179   }
   3180 }
   3181 
   3182 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
   3183   LocationSummary* locations =
   3184       new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
   3185   switch (sub->GetResultType()) {
   3186     case Primitive::kPrimInt: {
   3187       locations->SetInAt(0, Location::RequiresRegister());
   3188       locations->SetInAt(1, Location::Any());
   3189       locations->SetOut(Location::SameAsFirstInput());
   3190       break;
   3191     }
   3192     case Primitive::kPrimLong: {
   3193       locations->SetInAt(0, Location::RequiresRegister());
   3194       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
   3195       locations->SetOut(Location::SameAsFirstInput());
   3196       break;
   3197     }
   3198     case Primitive::kPrimFloat:
   3199     case Primitive::kPrimDouble: {
   3200       locations->SetInAt(0, Location::RequiresFpuRegister());
   3201       locations->SetInAt(1, Location::Any());
   3202       locations->SetOut(Location::SameAsFirstInput());
   3203       break;
   3204     }
   3205     default:
   3206       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   3207   }
   3208 }
   3209 
   3210 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
   3211   LocationSummary* locations = sub->GetLocations();
   3212   Location first = locations->InAt(0);
   3213   Location second = locations->InAt(1);
   3214   DCHECK(first.Equals(locations->Out()));
   3215   switch (sub->GetResultType()) {
   3216     case Primitive::kPrimInt: {
   3217       if (second.IsRegister()) {
   3218         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3219       } else if (second.IsConstant()) {
   3220         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
   3221         __ subl(first.AsRegister<CpuRegister>(), imm);
   3222       } else {
   3223         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
   3224       }
   3225       break;
   3226     }
   3227     case Primitive::kPrimLong: {
   3228       if (second.IsConstant()) {
   3229         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
   3230         DCHECK(IsInt<32>(value));
   3231         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
   3232       } else {
   3233         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3234       }
   3235       break;
   3236     }
   3237 
   3238     case Primitive::kPrimFloat: {
   3239       if (second.IsFpuRegister()) {
   3240         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3241       } else if (second.IsConstant()) {
   3242         __ subss(first.AsFpuRegister<XmmRegister>(),
   3243                  codegen_->LiteralFloatAddress(
   3244                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3245       } else {
   3246         DCHECK(second.IsStackSlot());
   3247         __ subss(first.AsFpuRegister<XmmRegister>(),
   3248                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3249       }
   3250       break;
   3251     }
   3252 
   3253     case Primitive::kPrimDouble: {
   3254       if (second.IsFpuRegister()) {
   3255         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3256       } else if (second.IsConstant()) {
   3257         __ subsd(first.AsFpuRegister<XmmRegister>(),
   3258                  codegen_->LiteralDoubleAddress(
   3259                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3260       } else {
   3261         DCHECK(second.IsDoubleStackSlot());
   3262         __ subsd(first.AsFpuRegister<XmmRegister>(),
   3263                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3264       }
   3265       break;
   3266     }
   3267 
   3268     default:
   3269       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   3270   }
   3271 }
   3272 
   3273 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
   3274   LocationSummary* locations =
   3275       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
   3276   switch (mul->GetResultType()) {
   3277     case Primitive::kPrimInt: {
   3278       locations->SetInAt(0, Location::RequiresRegister());
   3279       locations->SetInAt(1, Location::Any());
   3280       if (mul->InputAt(1)->IsIntConstant()) {
   3281         // Can use 3 operand multiply.
   3282         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3283       } else {
   3284         locations->SetOut(Location::SameAsFirstInput());
   3285       }
   3286       break;
   3287     }
   3288     case Primitive::kPrimLong: {
   3289       locations->SetInAt(0, Location::RequiresRegister());
   3290       locations->SetInAt(1, Location::Any());
   3291       if (mul->InputAt(1)->IsLongConstant() &&
   3292           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
   3293         // Can use 3 operand multiply.
   3294         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3295       } else {
   3296         locations->SetOut(Location::SameAsFirstInput());
   3297       }
   3298       break;
   3299     }
   3300     case Primitive::kPrimFloat:
   3301     case Primitive::kPrimDouble: {
   3302       locations->SetInAt(0, Location::RequiresFpuRegister());
   3303       locations->SetInAt(1, Location::Any());
   3304       locations->SetOut(Location::SameAsFirstInput());
   3305       break;
   3306     }
   3307 
   3308     default:
   3309       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   3310   }
   3311 }
   3312 
   3313 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
   3314   LocationSummary* locations = mul->GetLocations();
   3315   Location first = locations->InAt(0);
   3316   Location second = locations->InAt(1);
   3317   Location out = locations->Out();
   3318   switch (mul->GetResultType()) {
   3319     case Primitive::kPrimInt:
   3320       // The constant may have ended up in a register, so test explicitly to avoid
   3321       // problems where the output may not be the same as the first operand.
   3322       if (mul->InputAt(1)->IsIntConstant()) {
   3323         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
   3324         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
   3325       } else if (second.IsRegister()) {
   3326         DCHECK(first.Equals(out));
   3327         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3328       } else {
   3329         DCHECK(first.Equals(out));
   3330         DCHECK(second.IsStackSlot());
   3331         __ imull(first.AsRegister<CpuRegister>(),
   3332                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3333       }
   3334       break;
   3335     case Primitive::kPrimLong: {
   3336       // The constant may have ended up in a register, so test explicitly to avoid
   3337       // problems where the output may not be the same as the first operand.
   3338       if (mul->InputAt(1)->IsLongConstant()) {
   3339         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
   3340         if (IsInt<32>(value)) {
   3341           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
   3342                    Immediate(static_cast<int32_t>(value)));
   3343         } else {
   3344           // Have to use the constant area.
   3345           DCHECK(first.Equals(out));
   3346           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
   3347         }
   3348       } else if (second.IsRegister()) {
   3349         DCHECK(first.Equals(out));
   3350         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3351       } else {
   3352         DCHECK(second.IsDoubleStackSlot());
   3353         DCHECK(first.Equals(out));
   3354         __ imulq(first.AsRegister<CpuRegister>(),
   3355                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3356       }
   3357       break;
   3358     }
   3359 
   3360     case Primitive::kPrimFloat: {
   3361       DCHECK(first.Equals(out));
   3362       if (second.IsFpuRegister()) {
   3363         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3364       } else if (second.IsConstant()) {
   3365         __ mulss(first.AsFpuRegister<XmmRegister>(),
   3366                  codegen_->LiteralFloatAddress(
   3367                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3368       } else {
   3369         DCHECK(second.IsStackSlot());
   3370         __ mulss(first.AsFpuRegister<XmmRegister>(),
   3371                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3372       }
   3373       break;
   3374     }
   3375 
   3376     case Primitive::kPrimDouble: {
   3377       DCHECK(first.Equals(out));
   3378       if (second.IsFpuRegister()) {
   3379         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3380       } else if (second.IsConstant()) {
   3381         __ mulsd(first.AsFpuRegister<XmmRegister>(),
   3382                  codegen_->LiteralDoubleAddress(
   3383                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3384       } else {
   3385         DCHECK(second.IsDoubleStackSlot());
   3386         __ mulsd(first.AsFpuRegister<XmmRegister>(),
   3387                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3388       }
   3389       break;
   3390     }
   3391 
   3392     default:
   3393       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   3394   }
   3395 }
   3396 
   3397 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
   3398                                                      uint32_t stack_adjustment, bool is_float) {
   3399   if (source.IsStackSlot()) {
   3400     DCHECK(is_float);
   3401     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
   3402   } else if (source.IsDoubleStackSlot()) {
   3403     DCHECK(!is_float);
   3404     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
   3405   } else {
   3406     // Write the value to the temporary location on the stack and load to FP stack.
   3407     if (is_float) {
   3408       Location stack_temp = Location::StackSlot(temp_offset);
   3409       codegen_->Move(stack_temp, source);
   3410       __ flds(Address(CpuRegister(RSP), temp_offset));
   3411     } else {
   3412       Location stack_temp = Location::DoubleStackSlot(temp_offset);
   3413       codegen_->Move(stack_temp, source);
   3414       __ fldl(Address(CpuRegister(RSP), temp_offset));
   3415     }
   3416   }
   3417 }
   3418 
   3419 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
   3420   Primitive::Type type = rem->GetResultType();
   3421   bool is_float = type == Primitive::kPrimFloat;
   3422   size_t elem_size = Primitive::ComponentSize(type);
   3423   LocationSummary* locations = rem->GetLocations();
   3424   Location first = locations->InAt(0);
   3425   Location second = locations->InAt(1);
   3426   Location out = locations->Out();
   3427 
   3428   // Create stack space for 2 elements.
   3429   // TODO: enhance register allocator to ask for stack temporaries.
   3430   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
   3431 
   3432   // Load the values to the FP stack in reverse order, using temporaries if needed.
   3433   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
   3434   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
   3435 
   3436   // Loop doing FPREM until we stabilize.
   3437   NearLabel retry;
   3438   __ Bind(&retry);
   3439   __ fprem();
   3440 
   3441   // Move FP status to AX.
   3442   __ fstsw();
   3443 
   3444   // And see if the argument reduction is complete. This is signaled by the
   3445   // C2 FPU flag bit set to 0.
   3446   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
   3447   __ j(kNotEqual, &retry);
   3448 
   3449   // We have settled on the final value. Retrieve it into an XMM register.
   3450   // Store FP top of stack to real stack.
   3451   if (is_float) {
   3452     __ fsts(Address(CpuRegister(RSP), 0));
   3453   } else {
   3454     __ fstl(Address(CpuRegister(RSP), 0));
   3455   }
   3456 
   3457   // Pop the 2 items from the FP stack.
   3458   __ fucompp();
   3459 
   3460   // Load the value from the stack into an XMM register.
   3461   DCHECK(out.IsFpuRegister()) << out;
   3462   if (is_float) {
   3463     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
   3464   } else {
   3465     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
   3466   }
   3467 
   3468   // And remove the temporary stack space we allocated.
   3469   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
   3470 }
   3471 
   3472 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
   3473   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3474 
   3475   LocationSummary* locations = instruction->GetLocations();
   3476   Location second = locations->InAt(1);
   3477   DCHECK(second.IsConstant());
   3478 
   3479   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
   3480   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
   3481   int64_t imm = Int64FromConstant(second.GetConstant());
   3482 
   3483   DCHECK(imm == 1 || imm == -1);
   3484 
   3485   switch (instruction->GetResultType()) {
   3486     case Primitive::kPrimInt: {
   3487       if (instruction->IsRem()) {
   3488         __ xorl(output_register, output_register);
   3489       } else {
   3490         __ movl(output_register, input_register);
   3491         if (imm == -1) {
   3492           __ negl(output_register);
   3493         }
   3494       }
   3495       break;
   3496     }
   3497 
   3498     case Primitive::kPrimLong: {
   3499       if (instruction->IsRem()) {
   3500         __ xorl(output_register, output_register);
   3501       } else {
   3502         __ movq(output_register, input_register);
   3503         if (imm == -1) {
   3504           __ negq(output_register);
   3505         }
   3506       }
   3507       break;
   3508     }
   3509 
   3510     default:
   3511       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
   3512   }
   3513 }
   3514 
   3515 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   3516   LocationSummary* locations = instruction->GetLocations();
   3517   Location second = locations->InAt(1);
   3518 
   3519   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
   3520   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
   3521 
   3522   int64_t imm = Int64FromConstant(second.GetConstant());
   3523   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
   3524   uint64_t abs_imm = AbsOrMin(imm);
   3525 
   3526   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
   3527 
   3528   if (instruction->GetResultType() == Primitive::kPrimInt) {
   3529     __ leal(tmp, Address(numerator, abs_imm - 1));
   3530     __ testl(numerator, numerator);
   3531     __ cmov(kGreaterEqual, tmp, numerator);
   3532     int shift = CTZ(imm);
   3533     __ sarl(tmp, Immediate(shift));
   3534 
   3535     if (imm < 0) {
   3536       __ negl(tmp);
   3537     }
   3538 
   3539     __ movl(output_register, tmp);
   3540   } else {
   3541     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
   3542     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
   3543 
   3544     codegen_->Load64BitValue(rdx, abs_imm - 1);
   3545     __ addq(rdx, numerator);
   3546     __ testq(numerator, numerator);
   3547     __ cmov(kGreaterEqual, rdx, numerator);
   3548     int shift = CTZ(imm);
   3549     __ sarq(rdx, Immediate(shift));
   3550 
   3551     if (imm < 0) {
   3552       __ negq(rdx);
   3553     }
   3554 
   3555     __ movq(output_register, rdx);
   3556   }
   3557 }
   3558 
   3559 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
   3560   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3561 
   3562   LocationSummary* locations = instruction->GetLocations();
   3563   Location second = locations->InAt(1);
   3564 
   3565   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
   3566       : locations->GetTemp(0).AsRegister<CpuRegister>();
   3567   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
   3568   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
   3569       : locations->Out().AsRegister<CpuRegister>();
   3570   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   3571 
   3572   DCHECK_EQ(RAX, eax.AsRegister());
   3573   DCHECK_EQ(RDX, edx.AsRegister());
   3574   if (instruction->IsDiv()) {
   3575     DCHECK_EQ(RAX, out.AsRegister());
   3576   } else {
   3577     DCHECK_EQ(RDX, out.AsRegister());
   3578   }
   3579 
   3580   int64_t magic;
   3581   int shift;
   3582 
   3583   // TODO: can these branches be written as one?
   3584   if (instruction->GetResultType() == Primitive::kPrimInt) {
   3585     int imm = second.GetConstant()->AsIntConstant()->GetValue();
   3586 
   3587     CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
   3588 
   3589     __ movl(numerator, eax);
   3590 
   3591     __ movl(eax, Immediate(magic));
   3592     __ imull(numerator);
   3593 
   3594     if (imm > 0 && magic < 0) {
   3595       __ addl(edx, numerator);
   3596     } else if (imm < 0 && magic > 0) {
   3597       __ subl(edx, numerator);
   3598     }
   3599 
   3600     if (shift != 0) {
   3601       __ sarl(edx, Immediate(shift));
   3602     }
   3603 
   3604     __ movl(eax, edx);
   3605     __ shrl(edx, Immediate(31));
   3606     __ addl(edx, eax);
   3607 
   3608     if (instruction->IsRem()) {
   3609       __ movl(eax, numerator);
   3610       __ imull(edx, Immediate(imm));
   3611       __ subl(eax, edx);
   3612       __ movl(edx, eax);
   3613     } else {
   3614       __ movl(eax, edx);
   3615     }
   3616   } else {
   3617     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
   3618 
   3619     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
   3620 
   3621     CpuRegister rax = eax;
   3622     CpuRegister rdx = edx;
   3623 
   3624     CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
   3625 
   3626     // Save the numerator.
   3627     __ movq(numerator, rax);
   3628 
   3629     // RAX = magic
   3630     codegen_->Load64BitValue(rax, magic);
   3631 
   3632     // RDX:RAX = magic * numerator
   3633     __ imulq(numerator);
   3634 
   3635     if (imm > 0 && magic < 0) {
   3636       // RDX += numerator
   3637       __ addq(rdx, numerator);
   3638     } else if (imm < 0 && magic > 0) {
   3639       // RDX -= numerator
   3640       __ subq(rdx, numerator);
   3641     }
   3642 
   3643     // Shift if needed.
   3644     if (shift != 0) {
   3645       __ sarq(rdx, Immediate(shift));
   3646     }
   3647 
   3648     // RDX += 1 if RDX < 0
   3649     __ movq(rax, rdx);
   3650     __ shrq(rdx, Immediate(63));
   3651     __ addq(rdx, rax);
   3652 
   3653     if (instruction->IsRem()) {
   3654       __ movq(rax, numerator);
   3655 
   3656       if (IsInt<32>(imm)) {
   3657         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
   3658       } else {
   3659         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
   3660       }
   3661 
   3662       __ subq(rax, rdx);
   3663       __ movq(rdx, rax);
   3664     } else {
   3665       __ movq(rax, rdx);
   3666     }
   3667   }
   3668 }
   3669 
   3670 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   3671   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3672   Primitive::Type type = instruction->GetResultType();
   3673   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
   3674 
   3675   bool is_div = instruction->IsDiv();
   3676   LocationSummary* locations = instruction->GetLocations();
   3677 
   3678   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   3679   Location second = locations->InAt(1);
   3680 
   3681   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
   3682   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
   3683 
   3684   if (second.IsConstant()) {
   3685     int64_t imm = Int64FromConstant(second.GetConstant());
   3686 
   3687     if (imm == 0) {
   3688       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   3689     } else if (imm == 1 || imm == -1) {
   3690       DivRemOneOrMinusOne(instruction);
   3691     } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
   3692       DivByPowerOfTwo(instruction->AsDiv());
   3693     } else {
   3694       DCHECK(imm <= -2 || imm >= 2);
   3695       GenerateDivRemWithAnyConstant(instruction);
   3696     }
   3697   } else {
   3698     SlowPathCode* slow_path =
   3699         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
   3700             instruction, out.AsRegister(), type, is_div);
   3701     codegen_->AddSlowPath(slow_path);
   3702 
   3703     CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3704     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
   3705     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
   3706     // so it's safe to just use negl instead of more complex comparisons.
   3707     if (type == Primitive::kPrimInt) {
   3708       __ cmpl(second_reg, Immediate(-1));
   3709       __ j(kEqual, slow_path->GetEntryLabel());
   3710       // edx:eax <- sign-extended of eax
   3711       __ cdq();
   3712       // eax = quotient, edx = remainder
   3713       __ idivl(second_reg);
   3714     } else {
   3715       __ cmpq(second_reg, Immediate(-1));
   3716       __ j(kEqual, slow_path->GetEntryLabel());
   3717       // rdx:rax <- sign-extended of rax
   3718       __ cqo();
   3719       // rax = quotient, rdx = remainder
   3720       __ idivq(second_reg);
   3721     }
   3722     __ Bind(slow_path->GetExitLabel());
   3723   }
   3724 }
   3725 
   3726 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
   3727   LocationSummary* locations =
   3728       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
   3729   switch (div->GetResultType()) {
   3730     case Primitive::kPrimInt:
   3731     case Primitive::kPrimLong: {
   3732       locations->SetInAt(0, Location::RegisterLocation(RAX));
   3733       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
   3734       locations->SetOut(Location::SameAsFirstInput());
   3735       // Intel uses edx:eax as the dividend.
   3736       locations->AddTemp(Location::RegisterLocation(RDX));
   3737       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
   3738       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
   3739       // output and request another temp.
   3740       if (div->InputAt(1)->IsConstant()) {
   3741         locations->AddTemp(Location::RequiresRegister());
   3742       }
   3743       break;
   3744     }
   3745 
   3746     case Primitive::kPrimFloat:
   3747     case Primitive::kPrimDouble: {
   3748       locations->SetInAt(0, Location::RequiresFpuRegister());
   3749       locations->SetInAt(1, Location::Any());
   3750       locations->SetOut(Location::SameAsFirstInput());
   3751       break;
   3752     }
   3753 
   3754     default:
   3755       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3756   }
   3757 }
   3758 
   3759 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
   3760   LocationSummary* locations = div->GetLocations();
   3761   Location first = locations->InAt(0);
   3762   Location second = locations->InAt(1);
   3763   DCHECK(first.Equals(locations->Out()));
   3764 
   3765   Primitive::Type type = div->GetResultType();
   3766   switch (type) {
   3767     case Primitive::kPrimInt:
   3768     case Primitive::kPrimLong: {
   3769       GenerateDivRemIntegral(div);
   3770       break;
   3771     }
   3772 
   3773     case Primitive::kPrimFloat: {
   3774       if (second.IsFpuRegister()) {
   3775         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3776       } else if (second.IsConstant()) {
   3777         __ divss(first.AsFpuRegister<XmmRegister>(),
   3778                  codegen_->LiteralFloatAddress(
   3779                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3780       } else {
   3781         DCHECK(second.IsStackSlot());
   3782         __ divss(first.AsFpuRegister<XmmRegister>(),
   3783                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3784       }
   3785       break;
   3786     }
   3787 
   3788     case Primitive::kPrimDouble: {
   3789       if (second.IsFpuRegister()) {
   3790         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3791       } else if (second.IsConstant()) {
   3792         __ divsd(first.AsFpuRegister<XmmRegister>(),
   3793                  codegen_->LiteralDoubleAddress(
   3794                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3795       } else {
   3796         DCHECK(second.IsDoubleStackSlot());
   3797         __ divsd(first.AsFpuRegister<XmmRegister>(),
   3798                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3799       }
   3800       break;
   3801     }
   3802 
   3803     default:
   3804       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3805   }
   3806 }
   3807 
   3808 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
   3809   Primitive::Type type = rem->GetResultType();
   3810   LocationSummary* locations =
   3811     new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
   3812 
   3813   switch (type) {
   3814     case Primitive::kPrimInt:
   3815     case Primitive::kPrimLong: {
   3816       locations->SetInAt(0, Location::RegisterLocation(RAX));
   3817       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
   3818       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
   3819       locations->SetOut(Location::RegisterLocation(RDX));
   3820       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
   3821       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
   3822       // output and request another temp.
   3823       if (rem->InputAt(1)->IsConstant()) {
   3824         locations->AddTemp(Location::RequiresRegister());
   3825       }
   3826       break;
   3827     }
   3828 
   3829     case Primitive::kPrimFloat:
   3830     case Primitive::kPrimDouble: {
   3831       locations->SetInAt(0, Location::Any());
   3832       locations->SetInAt(1, Location::Any());
   3833       locations->SetOut(Location::RequiresFpuRegister());
   3834       locations->AddTemp(Location::RegisterLocation(RAX));
   3835       break;
   3836     }
   3837 
   3838     default:
   3839       LOG(FATAL) << "Unexpected rem type " << type;
   3840   }
   3841 }
   3842 
   3843 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
   3844   Primitive::Type type = rem->GetResultType();
   3845   switch (type) {
   3846     case Primitive::kPrimInt:
   3847     case Primitive::kPrimLong: {
   3848       GenerateDivRemIntegral(rem);
   3849       break;
   3850     }
   3851     case Primitive::kPrimFloat:
   3852     case Primitive::kPrimDouble: {
   3853       GenerateRemFP(rem);
   3854       break;
   3855     }
   3856     default:
   3857       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
   3858   }
   3859 }
   3860 
   3861 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3862   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   3863   locations->SetInAt(0, Location::Any());
   3864 }
   3865 
   3866 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3867   SlowPathCode* slow_path =
   3868       new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
   3869   codegen_->AddSlowPath(slow_path);
   3870 
   3871   LocationSummary* locations = instruction->GetLocations();
   3872   Location value = locations->InAt(0);
   3873 
   3874   switch (instruction->GetType()) {
   3875     case Primitive::kPrimBoolean:
   3876     case Primitive::kPrimByte:
   3877     case Primitive::kPrimChar:
   3878     case Primitive::kPrimShort:
   3879     case Primitive::kPrimInt: {
   3880       if (value.IsRegister()) {
   3881         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
   3882         __ j(kEqual, slow_path->GetEntryLabel());
   3883       } else if (value.IsStackSlot()) {
   3884         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
   3885         __ j(kEqual, slow_path->GetEntryLabel());
   3886       } else {
   3887         DCHECK(value.IsConstant()) << value;
   3888         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
   3889           __ jmp(slow_path->GetEntryLabel());
   3890         }
   3891       }
   3892       break;
   3893     }
   3894     case Primitive::kPrimLong: {
   3895       if (value.IsRegister()) {
   3896         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
   3897         __ j(kEqual, slow_path->GetEntryLabel());
   3898       } else if (value.IsDoubleStackSlot()) {
   3899         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
   3900         __ j(kEqual, slow_path->GetEntryLabel());
   3901       } else {
   3902         DCHECK(value.IsConstant()) << value;
   3903         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
   3904           __ jmp(slow_path->GetEntryLabel());
   3905         }
   3906       }
   3907       break;
   3908     }
   3909     default:
   3910       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
   3911   }
   3912 }
   3913 
   3914 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
   3915   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
   3916 
   3917   LocationSummary* locations =
   3918       new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
   3919 
   3920   switch (op->GetResultType()) {
   3921     case Primitive::kPrimInt:
   3922     case Primitive::kPrimLong: {
   3923       locations->SetInAt(0, Location::RequiresRegister());
   3924       // The shift count needs to be in CL.
   3925       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
   3926       locations->SetOut(Location::SameAsFirstInput());
   3927       break;
   3928     }
   3929     default:
   3930       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
   3931   }
   3932 }
   3933 
   3934 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
   3935   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
   3936 
   3937   LocationSummary* locations = op->GetLocations();
   3938   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
   3939   Location second = locations->InAt(1);
   3940 
   3941   switch (op->GetResultType()) {
   3942     case Primitive::kPrimInt: {
   3943       if (second.IsRegister()) {
   3944         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3945         if (op->IsShl()) {
   3946           __ shll(first_reg, second_reg);
   3947         } else if (op->IsShr()) {
   3948           __ sarl(first_reg, second_reg);
   3949         } else {
   3950           __ shrl(first_reg, second_reg);
   3951         }
   3952       } else {
   3953         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
   3954         if (op->IsShl()) {
   3955           __ shll(first_reg, imm);
   3956         } else if (op->IsShr()) {
   3957           __ sarl(first_reg, imm);
   3958         } else {
   3959           __ shrl(first_reg, imm);
   3960         }
   3961       }
   3962       break;
   3963     }
   3964     case Primitive::kPrimLong: {
   3965       if (second.IsRegister()) {
   3966         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3967         if (op->IsShl()) {
   3968           __ shlq(first_reg, second_reg);
   3969         } else if (op->IsShr()) {
   3970           __ sarq(first_reg, second_reg);
   3971         } else {
   3972           __ shrq(first_reg, second_reg);
   3973         }
   3974       } else {
   3975         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
   3976         if (op->IsShl()) {
   3977           __ shlq(first_reg, imm);
   3978         } else if (op->IsShr()) {
   3979           __ sarq(first_reg, imm);
   3980         } else {
   3981           __ shrq(first_reg, imm);
   3982         }
   3983       }
   3984       break;
   3985     }
   3986     default:
   3987       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
   3988       UNREACHABLE();
   3989   }
   3990 }
   3991 
   3992 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
   3993   LocationSummary* locations =
   3994       new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
   3995 
   3996   switch (ror->GetResultType()) {
   3997     case Primitive::kPrimInt:
   3998     case Primitive::kPrimLong: {
   3999       locations->SetInAt(0, Location::RequiresRegister());
   4000       // The shift count needs to be in CL (unless it is a constant).
   4001       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
   4002       locations->SetOut(Location::SameAsFirstInput());
   4003       break;
   4004     }
   4005     default:
   4006       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
   4007       UNREACHABLE();
   4008   }
   4009 }
   4010 
   4011 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
   4012   LocationSummary* locations = ror->GetLocations();
   4013   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
   4014   Location second = locations->InAt(1);
   4015 
   4016   switch (ror->GetResultType()) {
   4017     case Primitive::kPrimInt:
   4018       if (second.IsRegister()) {
   4019         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   4020         __ rorl(first_reg, second_reg);
   4021       } else {
   4022         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
   4023         __ rorl(first_reg, imm);
   4024       }
   4025       break;
   4026     case Primitive::kPrimLong:
   4027       if (second.IsRegister()) {
   4028         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   4029         __ rorq(first_reg, second_reg);
   4030       } else {
   4031         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
   4032         __ rorq(first_reg, imm);
   4033       }
   4034       break;
   4035     default:
   4036       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
   4037       UNREACHABLE();
   4038   }
   4039 }
   4040 
   4041 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
   4042   HandleShift(shl);
   4043 }
   4044 
   4045 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
   4046   HandleShift(shl);
   4047 }
   4048 
   4049 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
   4050   HandleShift(shr);
   4051 }
   4052 
   4053 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
   4054   HandleShift(shr);
   4055 }
   4056 
   4057 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
   4058   HandleShift(ushr);
   4059 }
   4060 
   4061 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
   4062   HandleShift(ushr);
   4063 }
   4064 
   4065 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   4066   LocationSummary* locations =
   4067       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   4068   InvokeRuntimeCallingConvention calling_convention;
   4069   if (instruction->IsStringAlloc()) {
   4070     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
   4071   } else {
   4072     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   4073   }
   4074   locations->SetOut(Location::RegisterLocation(RAX));
   4075 }
   4076 
   4077 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
   4078   // Note: if heap poisoning is enabled, the entry point takes cares
   4079   // of poisoning the reference.
   4080   if (instruction->IsStringAlloc()) {
   4081     // String is allocated through StringFactory. Call NewEmptyString entry point.
   4082     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   4083     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
   4084     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
   4085     __ call(Address(temp, code_offset.SizeValue()));
   4086     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   4087   } else {
   4088     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
   4089     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   4090     DCHECK(!codegen_->IsLeafMethod());
   4091   }
   4092 }
   4093 
   4094 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
   4095   LocationSummary* locations =
   4096       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   4097   InvokeRuntimeCallingConvention calling_convention;
   4098   locations->SetOut(Location::RegisterLocation(RAX));
   4099   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   4100   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   4101 }
   4102 
   4103 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
   4104   // Note: if heap poisoning is enabled, the entry point takes cares
   4105   // of poisoning the reference.
   4106   QuickEntrypointEnum entrypoint =
   4107       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
   4108   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   4109   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
   4110   DCHECK(!codegen_->IsLeafMethod());
   4111 }
   4112 
   4113 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
   4114   LocationSummary* locations =
   4115       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4116   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
   4117   if (location.IsStackSlot()) {
   4118     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   4119   } else if (location.IsDoubleStackSlot()) {
   4120     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   4121   }
   4122   locations->SetOut(location);
   4123 }
   4124 
   4125 void InstructionCodeGeneratorX86_64::VisitParameterValue(
   4126     HParameterValue* instruction ATTRIBUTE_UNUSED) {
   4127   // Nothing to do, the parameter is already at its location.
   4128 }
   4129 
   4130 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
   4131   LocationSummary* locations =
   4132       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4133   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
   4134 }
   4135 
   4136 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
   4137     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
   4138   // Nothing to do, the method is already at its location.
   4139 }
   4140 
   4141 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   4142   LocationSummary* locations =
   4143       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4144   locations->SetInAt(0, Location::RequiresRegister());
   4145   locations->SetOut(Location::RequiresRegister());
   4146 }
   4147 
   4148 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   4149   LocationSummary* locations = instruction->GetLocations();
   4150   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
   4151     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   4152         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
   4153     __ movq(locations->Out().AsRegister<CpuRegister>(),
   4154             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
   4155   } else {
   4156     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   4157         instruction->GetIndex(), kX86_64PointerSize));
   4158     __ movq(locations->Out().AsRegister<CpuRegister>(),
   4159             Address(locations->InAt(0).AsRegister<CpuRegister>(),
   4160             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   4161     __ movq(locations->Out().AsRegister<CpuRegister>(),
   4162             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
   4163   }
   4164 }
   4165 
   4166 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
   4167   LocationSummary* locations =
   4168       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
   4169   locations->SetInAt(0, Location::RequiresRegister());
   4170   locations->SetOut(Location::SameAsFirstInput());
   4171 }
   4172 
   4173 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
   4174   LocationSummary* locations = not_->GetLocations();
   4175   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
   4176             locations->Out().AsRegister<CpuRegister>().AsRegister());
   4177   Location out = locations->Out();
   4178   switch (not_->GetResultType()) {
   4179     case Primitive::kPrimInt:
   4180       __ notl(out.AsRegister<CpuRegister>());
   4181       break;
   4182 
   4183     case Primitive::kPrimLong:
   4184       __ notq(out.AsRegister<CpuRegister>());
   4185       break;
   4186 
   4187     default:
   4188       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
   4189   }
   4190 }
   4191 
   4192 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
   4193   LocationSummary* locations =
   4194       new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
   4195   locations->SetInAt(0, Location::RequiresRegister());
   4196   locations->SetOut(Location::SameAsFirstInput());
   4197 }
   4198 
   4199 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
   4200   LocationSummary* locations = bool_not->GetLocations();
   4201   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
   4202             locations->Out().AsRegister<CpuRegister>().AsRegister());
   4203   Location out = locations->Out();
   4204   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
   4205 }
   4206 
   4207 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
   4208   LocationSummary* locations =
   4209       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4210   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
   4211     locations->SetInAt(i, Location::Any());
   4212   }
   4213   locations->SetOut(Location::Any());
   4214 }
   4215 
   4216 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   4217   LOG(FATAL) << "Unimplemented";
   4218 }
   4219 
   4220 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
   4221   /*
   4222    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
   4223    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
   4224    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
   4225    */
   4226   switch (kind) {
   4227     case MemBarrierKind::kAnyAny: {
   4228       MemoryFence();
   4229       break;
   4230     }
   4231     case MemBarrierKind::kAnyStore:
   4232     case MemBarrierKind::kLoadAny:
   4233     case MemBarrierKind::kStoreStore: {
   4234       // nop
   4235       break;
   4236     }
   4237     case MemBarrierKind::kNTStoreStore:
   4238       // Non-Temporal Store/Store needs an explicit fence.
   4239       MemoryFence(/* non-temporal */ true);
   4240       break;
   4241   }
   4242 }
   4243 
   4244 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
   4245   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   4246 
   4247   bool object_field_get_with_read_barrier =
   4248       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   4249   LocationSummary* locations =
   4250       new (GetGraph()->GetArena()) LocationSummary(instruction,
   4251                                                    object_field_get_with_read_barrier ?
   4252                                                        LocationSummary::kCallOnSlowPath :
   4253                                                        LocationSummary::kNoCall);
   4254   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
   4255     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   4256   }
   4257   locations->SetInAt(0, Location::RequiresRegister());
   4258   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   4259     locations->SetOut(Location::RequiresFpuRegister());
   4260   } else {
   4261     // The output overlaps for an object field get when read barriers
   4262     // are enabled: we do not want the move to overwrite the object's
   4263     // location, as we need it to emit the read barrier.
   4264     locations->SetOut(
   4265         Location::RequiresRegister(),
   4266         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   4267   }
   4268 }
   4269 
   4270 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
   4271                                                     const FieldInfo& field_info) {
   4272   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   4273 
   4274   LocationSummary* locations = instruction->GetLocations();
   4275   Location base_loc = locations->InAt(0);
   4276   CpuRegister base = base_loc.AsRegister<CpuRegister>();
   4277   Location out = locations->Out();
   4278   bool is_volatile = field_info.IsVolatile();
   4279   Primitive::Type field_type = field_info.GetFieldType();
   4280   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   4281 
   4282   switch (field_type) {
   4283     case Primitive::kPrimBoolean: {
   4284       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
   4285       break;
   4286     }
   4287 
   4288     case Primitive::kPrimByte: {
   4289       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
   4290       break;
   4291     }
   4292 
   4293     case Primitive::kPrimShort: {
   4294       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
   4295       break;
   4296     }
   4297 
   4298     case Primitive::kPrimChar: {
   4299       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
   4300       break;
   4301     }
   4302 
   4303     case Primitive::kPrimInt: {
   4304       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
   4305       break;
   4306     }
   4307 
   4308     case Primitive::kPrimNot: {
   4309       // /* HeapReference<Object> */ out = *(base + offset)
   4310       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   4311         // Note that a potential implicit null check is handled in this
   4312         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
   4313         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   4314             instruction, out, base, offset, /* needs_null_check */ true);
   4315         if (is_volatile) {
   4316           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4317         }
   4318       } else {
   4319         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
   4320         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4321         if (is_volatile) {
   4322           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4323         }
   4324         // If read barriers are enabled, emit read barriers other than
   4325         // Baker's using a slow path (and also unpoison the loaded
   4326         // reference, if heap poisoning is enabled).
   4327         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
   4328       }
   4329       break;
   4330     }
   4331 
   4332     case Primitive::kPrimLong: {
   4333       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
   4334       break;
   4335     }
   4336 
   4337     case Primitive::kPrimFloat: {
   4338       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
   4339       break;
   4340     }
   4341 
   4342     case Primitive::kPrimDouble: {
   4343       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
   4344       break;
   4345     }
   4346 
   4347     case Primitive::kPrimVoid:
   4348       LOG(FATAL) << "Unreachable type " << field_type;
   4349       UNREACHABLE();
   4350   }
   4351 
   4352   if (field_type == Primitive::kPrimNot) {
   4353     // Potential implicit null checks, in the case of reference
   4354     // fields, are handled in the previous switch statement.
   4355   } else {
   4356     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4357   }
   4358 
   4359   if (is_volatile) {
   4360     if (field_type == Primitive::kPrimNot) {
   4361       // Memory barriers, in the case of references, are also handled
   4362       // in the previous switch statement.
   4363     } else {
   4364       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4365     }
   4366   }
   4367 }
   4368 
   4369 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
   4370                                             const FieldInfo& field_info) {
   4371   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   4372 
   4373   LocationSummary* locations =
   4374       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4375   Primitive::Type field_type = field_info.GetFieldType();
   4376   bool is_volatile = field_info.IsVolatile();
   4377   bool needs_write_barrier =
   4378       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
   4379 
   4380   locations->SetInAt(0, Location::RequiresRegister());
   4381   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
   4382     if (is_volatile) {
   4383       // In order to satisfy the semantics of volatile, this must be a single instruction store.
   4384       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
   4385     } else {
   4386       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
   4387     }
   4388   } else {
   4389     if (is_volatile) {
   4390       // In order to satisfy the semantics of volatile, this must be a single instruction store.
   4391       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
   4392     } else {
   4393       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4394     }
   4395   }
   4396   if (needs_write_barrier) {
   4397     // Temporary registers for the write barrier.
   4398     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   4399     locations->AddTemp(Location::RequiresRegister());
   4400   } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
   4401     // Temporary register for the reference poisoning.
   4402     locations->AddTemp(Location::RequiresRegister());
   4403   }
   4404 }
   4405 
   4406 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
   4407                                                     const FieldInfo& field_info,
   4408                                                     bool value_can_be_null) {
   4409   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   4410 
   4411   LocationSummary* locations = instruction->GetLocations();
   4412   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
   4413   Location value = locations->InAt(1);
   4414   bool is_volatile = field_info.IsVolatile();
   4415   Primitive::Type field_type = field_info.GetFieldType();
   4416   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   4417 
   4418   if (is_volatile) {
   4419     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   4420   }
   4421 
   4422   bool maybe_record_implicit_null_check_done = false;
   4423 
   4424   switch (field_type) {
   4425     case Primitive::kPrimBoolean:
   4426     case Primitive::kPrimByte: {
   4427       if (value.IsConstant()) {
   4428         int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4429         __ movb(Address(base, offset), Immediate(v));
   4430       } else {
   4431         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
   4432       }
   4433       break;
   4434     }
   4435 
   4436     case Primitive::kPrimShort:
   4437     case Primitive::kPrimChar: {
   4438       if (value.IsConstant()) {
   4439         int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4440         __ movw(Address(base, offset), Immediate(v));
   4441       } else {
   4442         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
   4443       }
   4444       break;
   4445     }
   4446 
   4447     case Primitive::kPrimInt:
   4448     case Primitive::kPrimNot: {
   4449       if (value.IsConstant()) {
   4450         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4451         // `field_type == Primitive::kPrimNot` implies `v == 0`.
   4452         DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
   4453         // Note: if heap poisoning is enabled, no need to poison
   4454         // (negate) `v` if it is a reference, as it would be null.
   4455         __ movl(Address(base, offset), Immediate(v));
   4456       } else {
   4457         if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
   4458           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   4459           __ movl(temp, value.AsRegister<CpuRegister>());
   4460           __ PoisonHeapReference(temp);
   4461           __ movl(Address(base, offset), temp);
   4462         } else {
   4463           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
   4464         }
   4465       }
   4466       break;
   4467     }
   4468 
   4469     case Primitive::kPrimLong: {
   4470       if (value.IsConstant()) {
   4471         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
   4472         codegen_->MoveInt64ToAddress(Address(base, offset),
   4473                                      Address(base, offset + sizeof(int32_t)),
   4474                                      v,
   4475                                      instruction);
   4476         maybe_record_implicit_null_check_done = true;
   4477       } else {
   4478         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
   4479       }
   4480       break;
   4481     }
   4482 
   4483     case Primitive::kPrimFloat: {
   4484       if (value.IsConstant()) {
   4485         int32_t v =
   4486             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
   4487         __ movl(Address(base, offset), Immediate(v));
   4488       } else {
   4489         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
   4490       }
   4491       break;
   4492     }
   4493 
   4494     case Primitive::kPrimDouble: {
   4495       if (value.IsConstant()) {
   4496         int64_t v =
   4497             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
   4498         codegen_->MoveInt64ToAddress(Address(base, offset),
   4499                                      Address(base, offset + sizeof(int32_t)),
   4500                                      v,
   4501                                      instruction);
   4502         maybe_record_implicit_null_check_done = true;
   4503       } else {
   4504         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
   4505       }
   4506       break;
   4507     }
   4508 
   4509     case Primitive::kPrimVoid:
   4510       LOG(FATAL) << "Unreachable type " << field_type;
   4511       UNREACHABLE();
   4512   }
   4513 
   4514   if (!maybe_record_implicit_null_check_done) {
   4515     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4516   }
   4517 
   4518   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
   4519     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   4520     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
   4521     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
   4522   }
   4523 
   4524   if (is_volatile) {
   4525     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   4526   }
   4527 }
   4528 
   4529 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   4530   HandleFieldSet(instruction, instruction->GetFieldInfo());
   4531 }
   4532 
   4533 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   4534   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   4535 }
   4536 
   4537 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   4538   HandleFieldGet(instruction);
   4539 }
   4540 
   4541 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   4542   HandleFieldGet(instruction, instruction->GetFieldInfo());
   4543 }
   4544 
   4545 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   4546   HandleFieldGet(instruction);
   4547 }
   4548 
   4549 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   4550   HandleFieldGet(instruction, instruction->GetFieldInfo());
   4551 }
   4552 
   4553 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   4554   HandleFieldSet(instruction, instruction->GetFieldInfo());
   4555 }
   4556 
   4557 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   4558   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   4559 }
   4560 
   4561 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
   4562     HUnresolvedInstanceFieldGet* instruction) {
   4563   FieldAccessCallingConventionX86_64 calling_convention;
   4564   codegen_->CreateUnresolvedFieldLocationSummary(
   4565       instruction, instruction->GetFieldType(), calling_convention);
   4566 }
   4567 
   4568 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
   4569     HUnresolvedInstanceFieldGet* instruction) {
   4570   FieldAccessCallingConventionX86_64 calling_convention;
   4571   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4572                                           instruction->GetFieldType(),
   4573                                           instruction->GetFieldIndex(),
   4574                                           instruction->GetDexPc(),
   4575                                           calling_convention);
   4576 }
   4577 
   4578 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
   4579     HUnresolvedInstanceFieldSet* instruction) {
   4580   FieldAccessCallingConventionX86_64 calling_convention;
   4581   codegen_->CreateUnresolvedFieldLocationSummary(
   4582       instruction, instruction->GetFieldType(), calling_convention);
   4583 }
   4584 
   4585 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
   4586     HUnresolvedInstanceFieldSet* instruction) {
   4587   FieldAccessCallingConventionX86_64 calling_convention;
   4588   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4589                                           instruction->GetFieldType(),
   4590                                           instruction->GetFieldIndex(),
   4591                                           instruction->GetDexPc(),
   4592                                           calling_convention);
   4593 }
   4594 
   4595 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
   4596     HUnresolvedStaticFieldGet* instruction) {
   4597   FieldAccessCallingConventionX86_64 calling_convention;
   4598   codegen_->CreateUnresolvedFieldLocationSummary(
   4599       instruction, instruction->GetFieldType(), calling_convention);
   4600 }
   4601 
   4602 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
   4603     HUnresolvedStaticFieldGet* instruction) {
   4604   FieldAccessCallingConventionX86_64 calling_convention;
   4605   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4606                                           instruction->GetFieldType(),
   4607                                           instruction->GetFieldIndex(),
   4608                                           instruction->GetDexPc(),
   4609                                           calling_convention);
   4610 }
   4611 
   4612 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
   4613     HUnresolvedStaticFieldSet* instruction) {
   4614   FieldAccessCallingConventionX86_64 calling_convention;
   4615   codegen_->CreateUnresolvedFieldLocationSummary(
   4616       instruction, instruction->GetFieldType(), calling_convention);
   4617 }
   4618 
   4619 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
   4620     HUnresolvedStaticFieldSet* instruction) {
   4621   FieldAccessCallingConventionX86_64 calling_convention;
   4622   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4623                                           instruction->GetFieldType(),
   4624                                           instruction->GetFieldIndex(),
   4625                                           instruction->GetDexPc(),
   4626                                           calling_convention);
   4627 }
   4628 
   4629 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
   4630   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   4631   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
   4632       ? Location::RequiresRegister()
   4633       : Location::Any();
   4634   locations->SetInAt(0, loc);
   4635 }
   4636 
   4637 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
   4638   if (CanMoveNullCheckToUser(instruction)) {
   4639     return;
   4640   }
   4641   LocationSummary* locations = instruction->GetLocations();
   4642   Location obj = locations->InAt(0);
   4643 
   4644   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
   4645   RecordPcInfo(instruction, instruction->GetDexPc());
   4646 }
   4647 
   4648 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   4649   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
   4650   AddSlowPath(slow_path);
   4651 
   4652   LocationSummary* locations = instruction->GetLocations();
   4653   Location obj = locations->InAt(0);
   4654 
   4655   if (obj.IsRegister()) {
   4656     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
   4657   } else if (obj.IsStackSlot()) {
   4658     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
   4659   } else {
   4660     DCHECK(obj.IsConstant()) << obj;
   4661     DCHECK(obj.GetConstant()->IsNullConstant());
   4662     __ jmp(slow_path->GetEntryLabel());
   4663     return;
   4664   }
   4665   __ j(kEqual, slow_path->GetEntryLabel());
   4666 }
   4667 
   4668 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
   4669   codegen_->GenerateNullCheck(instruction);
   4670 }
   4671 
   4672 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
   4673   bool object_array_get_with_read_barrier =
   4674       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   4675   LocationSummary* locations =
   4676       new (GetGraph()->GetArena()) LocationSummary(instruction,
   4677                                                    object_array_get_with_read_barrier ?
   4678                                                        LocationSummary::kCallOnSlowPath :
   4679                                                        LocationSummary::kNoCall);
   4680   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
   4681     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   4682   }
   4683   locations->SetInAt(0, Location::RequiresRegister());
   4684   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4685   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   4686     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   4687   } else {
   4688     // The output overlaps for an object array get when read barriers
   4689     // are enabled: we do not want the move to overwrite the array's
   4690     // location, as we need it to emit the read barrier.
   4691     locations->SetOut(
   4692         Location::RequiresRegister(),
   4693         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   4694   }
   4695 }
   4696 
   4697 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
   4698   LocationSummary* locations = instruction->GetLocations();
   4699   Location obj_loc = locations->InAt(0);
   4700   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   4701   Location index = locations->InAt(1);
   4702   Location out_loc = locations->Out();
   4703   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   4704 
   4705   Primitive::Type type = instruction->GetType();
   4706   switch (type) {
   4707     case Primitive::kPrimBoolean: {
   4708       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4709       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
   4710       break;
   4711     }
   4712 
   4713     case Primitive::kPrimByte: {
   4714       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4715       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
   4716       break;
   4717     }
   4718 
   4719     case Primitive::kPrimShort: {
   4720       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4721       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
   4722       break;
   4723     }
   4724 
   4725     case Primitive::kPrimChar: {
   4726       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4727       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   4728         // Branch cases into compressed and uncompressed for each index's type.
   4729         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   4730         NearLabel done, not_compressed;
   4731         __ testb(Address(obj, count_offset), Immediate(1));
   4732         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4733         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   4734                       "Expecting 0=compressed, 1=uncompressed");
   4735         __ j(kNotZero, &not_compressed);
   4736         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
   4737         __ jmp(&done);
   4738         __ Bind(&not_compressed);
   4739         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
   4740         __ Bind(&done);
   4741       } else {
   4742         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
   4743       }
   4744       break;
   4745     }
   4746 
   4747     case Primitive::kPrimInt: {
   4748       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4749       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
   4750       break;
   4751     }
   4752 
   4753     case Primitive::kPrimNot: {
   4754       static_assert(
   4755           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   4756           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   4757       // /* HeapReference<Object> */ out =
   4758       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   4759       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   4760         // Note that a potential implicit null check is handled in this
   4761         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
   4762         codegen_->GenerateArrayLoadWithBakerReadBarrier(
   4763             instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
   4764       } else {
   4765         CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4766         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
   4767         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4768         // If read barriers are enabled, emit read barriers other than
   4769         // Baker's using a slow path (and also unpoison the loaded
   4770         // reference, if heap poisoning is enabled).
   4771         if (index.IsConstant()) {
   4772           uint32_t offset =
   4773               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
   4774           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
   4775         } else {
   4776           codegen_->MaybeGenerateReadBarrierSlow(
   4777               instruction, out_loc, out_loc, obj_loc, data_offset, index);
   4778         }
   4779       }
   4780       break;
   4781     }
   4782 
   4783     case Primitive::kPrimLong: {
   4784       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4785       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
   4786       break;
   4787     }
   4788 
   4789     case Primitive::kPrimFloat: {
   4790       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
   4791       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
   4792       break;
   4793     }
   4794 
   4795     case Primitive::kPrimDouble: {
   4796       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
   4797       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
   4798       break;
   4799     }
   4800 
   4801     case Primitive::kPrimVoid:
   4802       LOG(FATAL) << "Unreachable type " << type;
   4803       UNREACHABLE();
   4804   }
   4805 
   4806   if (type == Primitive::kPrimNot) {
   4807     // Potential implicit null checks, in the case of reference
   4808     // arrays, are handled in the previous switch statement.
   4809   } else {
   4810     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4811   }
   4812 }
   4813 
   4814 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
   4815   Primitive::Type value_type = instruction->GetComponentType();
   4816 
   4817   bool needs_write_barrier =
   4818       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   4819   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   4820 
   4821   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
   4822       instruction,
   4823       may_need_runtime_call_for_type_check ?
   4824           LocationSummary::kCallOnSlowPath :
   4825           LocationSummary::kNoCall);
   4826 
   4827   locations->SetInAt(0, Location::RequiresRegister());
   4828   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4829   if (Primitive::IsFloatingPointType(value_type)) {
   4830     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   4831   } else {
   4832     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   4833   }
   4834 
   4835   if (needs_write_barrier) {
   4836     // Temporary registers for the write barrier.
   4837     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
   4838     locations->AddTemp(Location::RequiresRegister());
   4839   }
   4840 }
   4841 
   4842 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   4843   LocationSummary* locations = instruction->GetLocations();
   4844   Location array_loc = locations->InAt(0);
   4845   CpuRegister array = array_loc.AsRegister<CpuRegister>();
   4846   Location index = locations->InAt(1);
   4847   Location value = locations->InAt(2);
   4848   Primitive::Type value_type = instruction->GetComponentType();
   4849   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   4850   bool needs_write_barrier =
   4851       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   4852   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   4853   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   4854   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   4855 
   4856   switch (value_type) {
   4857     case Primitive::kPrimBoolean:
   4858     case Primitive::kPrimByte: {
   4859       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
   4860       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
   4861       if (value.IsRegister()) {
   4862         __ movb(address, value.AsRegister<CpuRegister>());
   4863       } else {
   4864         __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
   4865       }
   4866       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4867       break;
   4868     }
   4869 
   4870     case Primitive::kPrimShort:
   4871     case Primitive::kPrimChar: {
   4872       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
   4873       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
   4874       if (value.IsRegister()) {
   4875         __ movw(address, value.AsRegister<CpuRegister>());
   4876       } else {
   4877         DCHECK(value.IsConstant()) << value;
   4878         __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
   4879       }
   4880       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4881       break;
   4882     }
   4883 
   4884     case Primitive::kPrimNot: {
   4885       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4886       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
   4887 
   4888       if (!value.IsRegister()) {
   4889         // Just setting null.
   4890         DCHECK(instruction->InputAt(2)->IsNullConstant());
   4891         DCHECK(value.IsConstant()) << value;
   4892         __ movl(address, Immediate(0));
   4893         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4894         DCHECK(!needs_write_barrier);
   4895         DCHECK(!may_need_runtime_call_for_type_check);
   4896         break;
   4897       }
   4898 
   4899       DCHECK(needs_write_barrier);
   4900       CpuRegister register_value = value.AsRegister<CpuRegister>();
   4901       // We cannot use a NearLabel for `done`, as its range may be too
   4902       // short when Baker read barriers are enabled.
   4903       Label done;
   4904       NearLabel not_null, do_put;
   4905       SlowPathCode* slow_path = nullptr;
   4906       Location temp_loc = locations->GetTemp(0);
   4907       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
   4908       if (may_need_runtime_call_for_type_check) {
   4909         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
   4910         codegen_->AddSlowPath(slow_path);
   4911         if (instruction->GetValueCanBeNull()) {
   4912           __ testl(register_value, register_value);
   4913           __ j(kNotEqual, &not_null);
   4914           __ movl(address, Immediate(0));
   4915           codegen_->MaybeRecordImplicitNullCheck(instruction);
   4916           __ jmp(&done);
   4917           __ Bind(&not_null);
   4918         }
   4919 
   4920         // Note that when Baker read barriers are enabled, the type
   4921         // checks are performed without read barriers.  This is fine,
   4922         // even in the case where a class object is in the from-space
   4923         // after the flip, as a comparison involving such a type would
   4924         // not produce a false positive; it may of course produce a
   4925         // false negative, in which case we would take the ArraySet
   4926         // slow path.
   4927 
   4928         // /* HeapReference<Class> */ temp = array->klass_
   4929         __ movl(temp, Address(array, class_offset));
   4930         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4931         __ MaybeUnpoisonHeapReference(temp);
   4932 
   4933         // /* HeapReference<Class> */ temp = temp->component_type_
   4934         __ movl(temp, Address(temp, component_offset));
   4935         // If heap poisoning is enabled, no need to unpoison `temp`
   4936         // nor the object reference in `register_value->klass`, as
   4937         // we are comparing two poisoned references.
   4938         __ cmpl(temp, Address(register_value, class_offset));
   4939 
   4940         if (instruction->StaticTypeOfArrayIsObjectArray()) {
   4941           __ j(kEqual, &do_put);
   4942           // If heap poisoning is enabled, the `temp` reference has
   4943           // not been unpoisoned yet; unpoison it now.
   4944           __ MaybeUnpoisonHeapReference(temp);
   4945 
   4946           // If heap poisoning is enabled, no need to unpoison the
   4947           // heap reference loaded below, as it is only used for a
   4948           // comparison with null.
   4949           __ cmpl(Address(temp, super_offset), Immediate(0));
   4950           __ j(kNotEqual, slow_path->GetEntryLabel());
   4951           __ Bind(&do_put);
   4952         } else {
   4953           __ j(kNotEqual, slow_path->GetEntryLabel());
   4954         }
   4955       }
   4956 
   4957       if (kPoisonHeapReferences) {
   4958         __ movl(temp, register_value);
   4959         __ PoisonHeapReference(temp);
   4960         __ movl(address, temp);
   4961       } else {
   4962         __ movl(address, register_value);
   4963       }
   4964       if (!may_need_runtime_call_for_type_check) {
   4965         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4966       }
   4967 
   4968       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
   4969       codegen_->MarkGCCard(
   4970           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
   4971       __ Bind(&done);
   4972 
   4973       if (slow_path != nullptr) {
   4974         __ Bind(slow_path->GetExitLabel());
   4975       }
   4976 
   4977       break;
   4978     }
   4979 
   4980     case Primitive::kPrimInt: {
   4981       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4982       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
   4983       if (value.IsRegister()) {
   4984         __ movl(address, value.AsRegister<CpuRegister>());
   4985       } else {
   4986         DCHECK(value.IsConstant()) << value;
   4987         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4988         __ movl(address, Immediate(v));
   4989       }
   4990       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4991       break;
   4992     }
   4993 
   4994     case Primitive::kPrimLong: {
   4995       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
   4996       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
   4997       if (value.IsRegister()) {
   4998         __ movq(address, value.AsRegister<CpuRegister>());
   4999         codegen_->MaybeRecordImplicitNullCheck(instruction);
   5000       } else {
   5001         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
   5002         Address address_high =
   5003             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
   5004         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
   5005       }
   5006       break;
   5007     }
   5008 
   5009     case Primitive::kPrimFloat: {
   5010       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
   5011       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
   5012       if (value.IsFpuRegister()) {
   5013         __ movss(address, value.AsFpuRegister<XmmRegister>());
   5014       } else {
   5015         DCHECK(value.IsConstant());
   5016         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
   5017         __ movl(address, Immediate(v));
   5018       }
   5019       codegen_->MaybeRecordImplicitNullCheck(instruction);
   5020       break;
   5021     }
   5022 
   5023     case Primitive::kPrimDouble: {
   5024       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
   5025       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
   5026       if (value.IsFpuRegister()) {
   5027         __ movsd(address, value.AsFpuRegister<XmmRegister>());
   5028         codegen_->MaybeRecordImplicitNullCheck(instruction);
   5029       } else {
   5030         int64_t v =
   5031             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
   5032         Address address_high =
   5033             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
   5034         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
   5035       }
   5036       break;
   5037     }
   5038 
   5039     case Primitive::kPrimVoid:
   5040       LOG(FATAL) << "Unreachable type " << instruction->GetType();
   5041       UNREACHABLE();
   5042   }
   5043 }
   5044 
   5045 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
   5046   LocationSummary* locations =
   5047       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   5048   locations->SetInAt(0, Location::RequiresRegister());
   5049   if (!instruction->IsEmittedAtUseSite()) {
   5050     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5051   }
   5052 }
   5053 
   5054 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
   5055   if (instruction->IsEmittedAtUseSite()) {
   5056     return;
   5057   }
   5058 
   5059   LocationSummary* locations = instruction->GetLocations();
   5060   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   5061   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   5062   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   5063   __ movl(out, Address(obj, offset));
   5064   codegen_->MaybeRecordImplicitNullCheck(instruction);
   5065   // Mask out most significant bit in case the array is String's array of char.
   5066   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
   5067     __ shrl(out, Immediate(1));
   5068   }
   5069 }
   5070 
   5071 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
   5072   RegisterSet caller_saves = RegisterSet::Empty();
   5073   InvokeRuntimeCallingConvention calling_convention;
   5074   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   5075   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   5076   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
   5077   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   5078   HInstruction* length = instruction->InputAt(1);
   5079   if (!length->IsEmittedAtUseSite()) {
   5080     locations->SetInAt(1, Location::RegisterOrConstant(length));
   5081   }
   5082 }
   5083 
   5084 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
   5085   LocationSummary* locations = instruction->GetLocations();
   5086   Location index_loc = locations->InAt(0);
   5087   Location length_loc = locations->InAt(1);
   5088   SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
   5089 
   5090   if (length_loc.IsConstant()) {
   5091     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
   5092     if (index_loc.IsConstant()) {
   5093       // BCE will remove the bounds check if we are guarenteed to pass.
   5094       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
   5095       if (index < 0 || index >= length) {
   5096         codegen_->AddSlowPath(slow_path);
   5097         __ jmp(slow_path->GetEntryLabel());
   5098       } else {
   5099         // Some optimization after BCE may have generated this, and we should not
   5100         // generate a bounds check if it is a valid range.
   5101       }
   5102       return;
   5103     }
   5104 
   5105     // We have to reverse the jump condition because the length is the constant.
   5106     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
   5107     __ cmpl(index_reg, Immediate(length));
   5108     codegen_->AddSlowPath(slow_path);
   5109     __ j(kAboveEqual, slow_path->GetEntryLabel());
   5110   } else {
   5111     HInstruction* array_length = instruction->InputAt(1);
   5112     if (array_length->IsEmittedAtUseSite()) {
   5113       // Address the length field in the array.
   5114       DCHECK(array_length->IsArrayLength());
   5115       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
   5116       Location array_loc = array_length->GetLocations()->InAt(0);
   5117       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
   5118       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   5119         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
   5120         // the string compression flag) with the in-memory length and avoid the temporary.
   5121         CpuRegister length_reg = CpuRegister(TMP);
   5122         __ movl(length_reg, array_len);
   5123         codegen_->MaybeRecordImplicitNullCheck(array_length);
   5124         __ shrl(length_reg, Immediate(1));
   5125         codegen_->GenerateIntCompare(length_reg, index_loc);
   5126       } else {
   5127         // Checking the bound for general case:
   5128         // Array of char or String's array when the compression feature off.
   5129         if (index_loc.IsConstant()) {
   5130           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
   5131           __ cmpl(array_len, Immediate(value));
   5132         } else {
   5133           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
   5134         }
   5135         codegen_->MaybeRecordImplicitNullCheck(array_length);
   5136       }
   5137     } else {
   5138       codegen_->GenerateIntCompare(length_loc, index_loc);
   5139     }
   5140     codegen_->AddSlowPath(slow_path);
   5141     __ j(kBelowEqual, slow_path->GetEntryLabel());
   5142   }
   5143 }
   5144 
   5145 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
   5146                                      CpuRegister card,
   5147                                      CpuRegister object,
   5148                                      CpuRegister value,
   5149                                      bool value_can_be_null) {
   5150   NearLabel is_null;
   5151   if (value_can_be_null) {
   5152     __ testl(value, value);
   5153     __ j(kEqual, &is_null);
   5154   }
   5155   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
   5156                                         /* no_rip */ true));
   5157   __ movq(temp, object);
   5158   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
   5159   __ movb(Address(temp, card, TIMES_1, 0), card);
   5160   if (value_can_be_null) {
   5161     __ Bind(&is_null);
   5162   }
   5163 }
   5164 
   5165 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   5166   LOG(FATAL) << "Unimplemented";
   5167 }
   5168 
   5169 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
   5170   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
   5171 }
   5172 
   5173 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5174   LocationSummary* locations =
   5175       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
   5176   // In suspend check slow path, usually there are no caller-save registers at all.
   5177   // If SIMD instructions are present, however, we force spilling all live SIMD
   5178   // registers in full width (since the runtime only saves/restores lower part).
   5179   locations->SetCustomSlowPathCallerSaves(
   5180       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
   5181 }
   5182 
   5183 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5184   HBasicBlock* block = instruction->GetBlock();
   5185   if (block->GetLoopInformation() != nullptr) {
   5186     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
   5187     // The back edge will generate the suspend check.
   5188     return;
   5189   }
   5190   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
   5191     // The goto will generate the suspend check.
   5192     return;
   5193   }
   5194   GenerateSuspendCheck(instruction, nullptr);
   5195 }
   5196 
   5197 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
   5198                                                           HBasicBlock* successor) {
   5199   SuspendCheckSlowPathX86_64* slow_path =
   5200       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
   5201   if (slow_path == nullptr) {
   5202     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
   5203     instruction->SetSlowPath(slow_path);
   5204     codegen_->AddSlowPath(slow_path);
   5205     if (successor != nullptr) {
   5206       DCHECK(successor->IsLoopHeader());
   5207       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
   5208     }
   5209   } else {
   5210     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   5211   }
   5212 
   5213   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
   5214                                   /* no_rip */ true),
   5215                 Immediate(0));
   5216   if (successor == nullptr) {
   5217     __ j(kNotEqual, slow_path->GetEntryLabel());
   5218     __ Bind(slow_path->GetReturnLabel());
   5219   } else {
   5220     __ j(kEqual, codegen_->GetLabelOf(successor));
   5221     __ jmp(slow_path->GetEntryLabel());
   5222   }
   5223 }
   5224 
   5225 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
   5226   return codegen_->GetAssembler();
   5227 }
   5228 
   5229 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
   5230   MoveOperands* move = moves_[index];
   5231   Location source = move->GetSource();
   5232   Location destination = move->GetDestination();
   5233 
   5234   if (source.IsRegister()) {
   5235     if (destination.IsRegister()) {
   5236       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
   5237     } else if (destination.IsStackSlot()) {
   5238       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5239               source.AsRegister<CpuRegister>());
   5240     } else {
   5241       DCHECK(destination.IsDoubleStackSlot());
   5242       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5243               source.AsRegister<CpuRegister>());
   5244     }
   5245   } else if (source.IsStackSlot()) {
   5246     if (destination.IsRegister()) {
   5247       __ movl(destination.AsRegister<CpuRegister>(),
   5248               Address(CpuRegister(RSP), source.GetStackIndex()));
   5249     } else if (destination.IsFpuRegister()) {
   5250       __ movss(destination.AsFpuRegister<XmmRegister>(),
   5251               Address(CpuRegister(RSP), source.GetStackIndex()));
   5252     } else {
   5253       DCHECK(destination.IsStackSlot());
   5254       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   5255       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   5256     }
   5257   } else if (source.IsDoubleStackSlot()) {
   5258     if (destination.IsRegister()) {
   5259       __ movq(destination.AsRegister<CpuRegister>(),
   5260               Address(CpuRegister(RSP), source.GetStackIndex()));
   5261     } else if (destination.IsFpuRegister()) {
   5262       __ movsd(destination.AsFpuRegister<XmmRegister>(),
   5263                Address(CpuRegister(RSP), source.GetStackIndex()));
   5264     } else {
   5265       DCHECK(destination.IsDoubleStackSlot()) << destination;
   5266       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   5267       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   5268     }
   5269   } else if (source.IsSIMDStackSlot()) {
   5270     DCHECK(destination.IsFpuRegister());
   5271     __ movups(destination.AsFpuRegister<XmmRegister>(),
   5272               Address(CpuRegister(RSP), source.GetStackIndex()));
   5273   } else if (source.IsConstant()) {
   5274     HConstant* constant = source.GetConstant();
   5275     if (constant->IsIntConstant() || constant->IsNullConstant()) {
   5276       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
   5277       if (destination.IsRegister()) {
   5278         if (value == 0) {
   5279           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   5280         } else {
   5281           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
   5282         }
   5283       } else {
   5284         DCHECK(destination.IsStackSlot()) << destination;
   5285         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
   5286       }
   5287     } else if (constant->IsLongConstant()) {
   5288       int64_t value = constant->AsLongConstant()->GetValue();
   5289       if (destination.IsRegister()) {
   5290         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
   5291       } else {
   5292         DCHECK(destination.IsDoubleStackSlot()) << destination;
   5293         codegen_->Store64BitValueToStack(destination, value);
   5294       }
   5295     } else if (constant->IsFloatConstant()) {
   5296       float fp_value = constant->AsFloatConstant()->GetValue();
   5297       if (destination.IsFpuRegister()) {
   5298         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   5299         codegen_->Load32BitValue(dest, fp_value);
   5300       } else {
   5301         DCHECK(destination.IsStackSlot()) << destination;
   5302         Immediate imm(bit_cast<int32_t, float>(fp_value));
   5303         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
   5304       }
   5305     } else {
   5306       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
   5307       double fp_value =  constant->AsDoubleConstant()->GetValue();
   5308       int64_t value = bit_cast<int64_t, double>(fp_value);
   5309       if (destination.IsFpuRegister()) {
   5310         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   5311         codegen_->Load64BitValue(dest, fp_value);
   5312       } else {
   5313         DCHECK(destination.IsDoubleStackSlot()) << destination;
   5314         codegen_->Store64BitValueToStack(destination, value);
   5315       }
   5316     }
   5317   } else if (source.IsFpuRegister()) {
   5318     if (destination.IsFpuRegister()) {
   5319       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
   5320     } else if (destination.IsStackSlot()) {
   5321       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5322                source.AsFpuRegister<XmmRegister>());
   5323     } else if (destination.IsDoubleStackSlot()) {
   5324       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5325                source.AsFpuRegister<XmmRegister>());
   5326     } else {
   5327        DCHECK(destination.IsSIMDStackSlot());
   5328       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5329                 source.AsFpuRegister<XmmRegister>());
   5330     }
   5331   }
   5332 }
   5333 
   5334 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
   5335   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5336   __ movl(Address(CpuRegister(RSP), mem), reg);
   5337   __ movl(reg, CpuRegister(TMP));
   5338 }
   5339 
   5340 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
   5341   ScratchRegisterScope ensure_scratch(
   5342       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
   5343 
   5344   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
   5345   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
   5346   __ movl(CpuRegister(ensure_scratch.GetRegister()),
   5347           Address(CpuRegister(RSP), mem2 + stack_offset));
   5348   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
   5349   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
   5350           CpuRegister(ensure_scratch.GetRegister()));
   5351 }
   5352 
   5353 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
   5354   __ movq(CpuRegister(TMP), reg1);
   5355   __ movq(reg1, reg2);
   5356   __ movq(reg2, CpuRegister(TMP));
   5357 }
   5358 
   5359 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
   5360   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5361   __ movq(Address(CpuRegister(RSP), mem), reg);
   5362   __ movq(reg, CpuRegister(TMP));
   5363 }
   5364 
   5365 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
   5366   ScratchRegisterScope ensure_scratch(
   5367       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
   5368 
   5369   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
   5370   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
   5371   __ movq(CpuRegister(ensure_scratch.GetRegister()),
   5372           Address(CpuRegister(RSP), mem2 + stack_offset));
   5373   __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
   5374   __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
   5375           CpuRegister(ensure_scratch.GetRegister()));
   5376 }
   5377 
   5378 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
   5379   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5380   __ movss(Address(CpuRegister(RSP), mem), reg);
   5381   __ movd(reg, CpuRegister(TMP));
   5382 }
   5383 
   5384 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
   5385   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5386   __ movsd(Address(CpuRegister(RSP), mem), reg);
   5387   __ movd(reg, CpuRegister(TMP));
   5388 }
   5389 
   5390 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   5391   MoveOperands* move = moves_[index];
   5392   Location source = move->GetSource();
   5393   Location destination = move->GetDestination();
   5394 
   5395   if (source.IsRegister() && destination.IsRegister()) {
   5396     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   5397   } else if (source.IsRegister() && destination.IsStackSlot()) {
   5398     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   5399   } else if (source.IsStackSlot() && destination.IsRegister()) {
   5400     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   5401   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
   5402     Exchange32(destination.GetStackIndex(), source.GetStackIndex());
   5403   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
   5404     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   5405   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
   5406     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   5407   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
   5408     Exchange64(destination.GetStackIndex(), source.GetStackIndex());
   5409   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
   5410     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
   5411     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
   5412     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
   5413   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
   5414     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   5415   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
   5416     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   5417   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
   5418     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   5419   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
   5420     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   5421   } else {
   5422     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
   5423   }
   5424 }
   5425 
   5426 
   5427 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
   5428   __ pushq(CpuRegister(reg));
   5429 }
   5430 
   5431 
   5432 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
   5433   __ popq(CpuRegister(reg));
   5434 }
   5435 
   5436 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
   5437     SlowPathCode* slow_path, CpuRegister class_reg) {
   5438   __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
   5439           Immediate(mirror::Class::kStatusInitialized));
   5440   __ j(kLess, slow_path->GetEntryLabel());
   5441   __ Bind(slow_path->GetExitLabel());
   5442   // No need for memory fence, thanks to the x86-64 memory model.
   5443 }
   5444 
   5445 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
   5446     HLoadClass::LoadKind desired_class_load_kind) {
   5447   switch (desired_class_load_kind) {
   5448     case HLoadClass::LoadKind::kInvalid:
   5449       LOG(FATAL) << "UNREACHABLE";
   5450       UNREACHABLE();
   5451     case HLoadClass::LoadKind::kReferrersClass:
   5452       break;
   5453     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
   5454     case HLoadClass::LoadKind::kBssEntry:
   5455       DCHECK(!Runtime::Current()->UseJitCompilation());
   5456       break;
   5457     case HLoadClass::LoadKind::kJitTableAddress:
   5458       DCHECK(Runtime::Current()->UseJitCompilation());
   5459       break;
   5460     case HLoadClass::LoadKind::kBootImageAddress:
   5461     case HLoadClass::LoadKind::kRuntimeCall:
   5462       break;
   5463   }
   5464   return desired_class_load_kind;
   5465 }
   5466 
   5467 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
   5468   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   5469   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
   5470     // Custom calling convention: RAX serves as both input and output.
   5471     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
   5472         cls,
   5473         Location::RegisterLocation(RAX),
   5474         Location::RegisterLocation(RAX));
   5475     return;
   5476   }
   5477   DCHECK(!cls->NeedsAccessCheck());
   5478 
   5479   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   5480   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
   5481       ? LocationSummary::kCallOnSlowPath
   5482       : LocationSummary::kNoCall;
   5483   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
   5484   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
   5485     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   5486   }
   5487 
   5488   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
   5489     locations->SetInAt(0, Location::RequiresRegister());
   5490   }
   5491   locations->SetOut(Location::RequiresRegister());
   5492   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
   5493     if (!kUseReadBarrier || kUseBakerReadBarrier) {
   5494       // Rely on the type resolution and/or initialization to save everything.
   5495       // Custom calling convention: RAX serves as both input and output.
   5496       RegisterSet caller_saves = RegisterSet::Empty();
   5497       caller_saves.Add(Location::RegisterLocation(RAX));
   5498       locations->SetCustomSlowPathCallerSaves(caller_saves);
   5499     } else {
   5500       // For non-Baker read barrier we have a temp-clobbering call.
   5501     }
   5502   }
   5503 }
   5504 
   5505 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
   5506                                                  dex::TypeIndex dex_index,
   5507                                                  Handle<mirror::Class> handle) {
   5508   jit_class_roots_.Overwrite(
   5509       TypeReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
   5510   // Add a patch entry and return the label.
   5511   jit_class_patches_.emplace_back(dex_file, dex_index.index_);
   5512   PatchInfo<Label>* info = &jit_class_patches_.back();
   5513   return &info->label;
   5514 }
   5515 
   5516 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   5517 // move.
   5518 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   5519   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   5520   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
   5521     codegen_->GenerateLoadClassRuntimeCall(cls);
   5522     return;
   5523   }
   5524   DCHECK(!cls->NeedsAccessCheck());
   5525 
   5526   LocationSummary* locations = cls->GetLocations();
   5527   Location out_loc = locations->Out();
   5528   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5529 
   5530   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
   5531       ? kWithoutReadBarrier
   5532       : kCompilerReadBarrierOption;
   5533   bool generate_null_check = false;
   5534   switch (load_kind) {
   5535     case HLoadClass::LoadKind::kReferrersClass: {
   5536       DCHECK(!cls->CanCallRuntime());
   5537       DCHECK(!cls->MustGenerateClinitCheck());
   5538       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
   5539       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
   5540       GenerateGcRootFieldLoad(
   5541           cls,
   5542           out_loc,
   5543           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
   5544           /* fixup_label */ nullptr,
   5545           read_barrier_option);
   5546       break;
   5547     }
   5548     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
   5549       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
   5550       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   5551       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
   5552       codegen_->RecordBootTypePatch(cls);
   5553       break;
   5554     case HLoadClass::LoadKind::kBootImageAddress: {
   5555       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   5556       uint32_t address = dchecked_integral_cast<uint32_t>(
   5557           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
   5558       DCHECK_NE(address, 0u);
   5559       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
   5560       break;
   5561     }
   5562     case HLoadClass::LoadKind::kBssEntry: {
   5563       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5564                                           /* no_rip */ false);
   5565       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
   5566       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
   5567       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
   5568       generate_null_check = true;
   5569       break;
   5570     }
   5571     case HLoadClass::LoadKind::kJitTableAddress: {
   5572       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5573                                           /* no_rip */ true);
   5574       Label* fixup_label =
   5575           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
   5576       // /* GcRoot<mirror::Class> */ out = *address
   5577       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
   5578       break;
   5579     }
   5580     default:
   5581       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
   5582       UNREACHABLE();
   5583   }
   5584 
   5585   if (generate_null_check || cls->MustGenerateClinitCheck()) {
   5586     DCHECK(cls->CanCallRuntime());
   5587     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
   5588         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
   5589     codegen_->AddSlowPath(slow_path);
   5590     if (generate_null_check) {
   5591       __ testl(out, out);
   5592       __ j(kEqual, slow_path->GetEntryLabel());
   5593     }
   5594     if (cls->MustGenerateClinitCheck()) {
   5595       GenerateClassInitializationCheck(slow_path, out);
   5596     } else {
   5597       __ Bind(slow_path->GetExitLabel());
   5598     }
   5599   }
   5600 }
   5601 
   5602 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
   5603   LocationSummary* locations =
   5604       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
   5605   locations->SetInAt(0, Location::RequiresRegister());
   5606   if (check->HasUses()) {
   5607     locations->SetOut(Location::SameAsFirstInput());
   5608   }
   5609 }
   5610 
   5611 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
   5612   // We assume the class to not be null.
   5613   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
   5614       check->GetLoadClass(), check, check->GetDexPc(), true);
   5615   codegen_->AddSlowPath(slow_path);
   5616   GenerateClassInitializationCheck(slow_path,
   5617                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
   5618 }
   5619 
   5620 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
   5621     HLoadString::LoadKind desired_string_load_kind) {
   5622   switch (desired_string_load_kind) {
   5623     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
   5624     case HLoadString::LoadKind::kBssEntry:
   5625       DCHECK(!Runtime::Current()->UseJitCompilation());
   5626       break;
   5627     case HLoadString::LoadKind::kJitTableAddress:
   5628       DCHECK(Runtime::Current()->UseJitCompilation());
   5629       break;
   5630     case HLoadString::LoadKind::kBootImageAddress:
   5631     case HLoadString::LoadKind::kRuntimeCall:
   5632       break;
   5633   }
   5634   return desired_string_load_kind;
   5635 }
   5636 
   5637 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
   5638   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   5639   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   5640   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
   5641     locations->SetOut(Location::RegisterLocation(RAX));
   5642   } else {
   5643     locations->SetOut(Location::RequiresRegister());
   5644     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
   5645       if (!kUseReadBarrier || kUseBakerReadBarrier) {
   5646         // Rely on the pResolveString to save everything.
   5647         // Custom calling convention: RAX serves as both input and output.
   5648         RegisterSet caller_saves = RegisterSet::Empty();
   5649         caller_saves.Add(Location::RegisterLocation(RAX));
   5650         locations->SetCustomSlowPathCallerSaves(caller_saves);
   5651       } else {
   5652         // For non-Baker read barrier we have a temp-clobbering call.
   5653       }
   5654     }
   5655   }
   5656 }
   5657 
   5658 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
   5659                                                   dex::StringIndex dex_index,
   5660                                                   Handle<mirror::String> handle) {
   5661   jit_string_roots_.Overwrite(
   5662       StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
   5663   // Add a patch entry and return the label.
   5664   jit_string_patches_.emplace_back(dex_file, dex_index.index_);
   5665   PatchInfo<Label>* info = &jit_string_patches_.back();
   5666   return &info->label;
   5667 }
   5668 
   5669 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   5670 // move.
   5671 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
   5672   LocationSummary* locations = load->GetLocations();
   5673   Location out_loc = locations->Out();
   5674   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5675 
   5676   switch (load->GetLoadKind()) {
   5677     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
   5678       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
   5679       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
   5680       codegen_->RecordBootStringPatch(load);
   5681       return;  // No dex cache slow path.
   5682     }
   5683     case HLoadString::LoadKind::kBootImageAddress: {
   5684       uint32_t address = dchecked_integral_cast<uint32_t>(
   5685           reinterpret_cast<uintptr_t>(load->GetString().Get()));
   5686       DCHECK_NE(address, 0u);
   5687       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
   5688       return;  // No dex cache slow path.
   5689     }
   5690     case HLoadString::LoadKind::kBssEntry: {
   5691       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5692                                           /* no_rip */ false);
   5693       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
   5694       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
   5695       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
   5696       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
   5697       codegen_->AddSlowPath(slow_path);
   5698       __ testl(out, out);
   5699       __ j(kEqual, slow_path->GetEntryLabel());
   5700       __ Bind(slow_path->GetExitLabel());
   5701       return;
   5702     }
   5703     case HLoadString::LoadKind::kJitTableAddress: {
   5704       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5705                                           /* no_rip */ true);
   5706       Label* fixup_label = codegen_->NewJitRootStringPatch(
   5707           load->GetDexFile(), load->GetStringIndex(), load->GetString());
   5708       // /* GcRoot<mirror::String> */ out = *address
   5709       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
   5710       return;
   5711     }
   5712     default:
   5713       break;
   5714   }
   5715 
   5716   // TODO: Re-add the compiler code to do string dex cache lookup again.
   5717   // Custom calling convention: RAX serves as both input and output.
   5718   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
   5719   codegen_->InvokeRuntime(kQuickResolveString,
   5720                           load,
   5721                           load->GetDexPc());
   5722   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
   5723 }
   5724 
   5725 static Address GetExceptionTlsAddress() {
   5726   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
   5727                            /* no_rip */ true);
   5728 }
   5729 
   5730 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
   5731   LocationSummary* locations =
   5732       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
   5733   locations->SetOut(Location::RequiresRegister());
   5734 }
   5735 
   5736 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
   5737   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
   5738 }
   5739 
   5740 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
   5741   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
   5742 }
   5743 
   5744 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
   5745   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
   5746 }
   5747 
   5748 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
   5749   LocationSummary* locations =
   5750       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   5751   InvokeRuntimeCallingConvention calling_convention;
   5752   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   5753 }
   5754 
   5755 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
   5756   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
   5757   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
   5758 }
   5759 
   5760 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   5761   if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
   5762     // We need a temporary for holding the iftable length.
   5763     return true;
   5764   }
   5765   return kEmitCompilerReadBarrier &&
   5766       !kUseBakerReadBarrier &&
   5767       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   5768        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   5769        type_check_kind == TypeCheckKind::kArrayObjectCheck);
   5770 }
   5771 
   5772 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   5773   return kEmitCompilerReadBarrier &&
   5774       !kUseBakerReadBarrier &&
   5775       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   5776        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   5777        type_check_kind == TypeCheckKind::kArrayObjectCheck);
   5778 }
   5779 
   5780 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   5781   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   5782   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5783   bool baker_read_barrier_slow_path = false;
   5784   switch (type_check_kind) {
   5785     case TypeCheckKind::kExactCheck:
   5786     case TypeCheckKind::kAbstractClassCheck:
   5787     case TypeCheckKind::kClassHierarchyCheck:
   5788     case TypeCheckKind::kArrayObjectCheck:
   5789       call_kind =
   5790           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
   5791       baker_read_barrier_slow_path = kUseBakerReadBarrier;
   5792       break;
   5793     case TypeCheckKind::kArrayCheck:
   5794     case TypeCheckKind::kUnresolvedCheck:
   5795     case TypeCheckKind::kInterfaceCheck:
   5796       call_kind = LocationSummary::kCallOnSlowPath;
   5797       break;
   5798   }
   5799 
   5800   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   5801   if (baker_read_barrier_slow_path) {
   5802     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   5803   }
   5804   locations->SetInAt(0, Location::RequiresRegister());
   5805   locations->SetInAt(1, Location::Any());
   5806   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
   5807   locations->SetOut(Location::RequiresRegister());
   5808   // When read barriers are enabled, we need a temporary register for
   5809   // some cases.
   5810   if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
   5811     locations->AddTemp(Location::RequiresRegister());
   5812   }
   5813 }
   5814 
   5815 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   5816   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5817   LocationSummary* locations = instruction->GetLocations();
   5818   Location obj_loc = locations->InAt(0);
   5819   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   5820   Location cls = locations->InAt(1);
   5821   Location out_loc =  locations->Out();
   5822   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5823   Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
   5824       locations->GetTemp(0) :
   5825       Location::NoLocation();
   5826   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   5827   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   5828   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   5829   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   5830   SlowPathCode* slow_path = nullptr;
   5831   NearLabel done, zero;
   5832 
   5833   // Return 0 if `obj` is null.
   5834   // Avoid null check if we know obj is not null.
   5835   if (instruction->MustDoNullCheck()) {
   5836     __ testl(obj, obj);
   5837     __ j(kEqual, &zero);
   5838   }
   5839 
   5840   switch (type_check_kind) {
   5841     case TypeCheckKind::kExactCheck: {
   5842       // /* HeapReference<Class> */ out = obj->klass_
   5843       GenerateReferenceLoadTwoRegisters(instruction,
   5844                                         out_loc,
   5845                                         obj_loc,
   5846                                         class_offset,
   5847                                         kCompilerReadBarrierOption);
   5848       if (cls.IsRegister()) {
   5849         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5850       } else {
   5851         DCHECK(cls.IsStackSlot()) << cls;
   5852         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5853       }
   5854       if (zero.IsLinked()) {
   5855         // Classes must be equal for the instanceof to succeed.
   5856         __ j(kNotEqual, &zero);
   5857         __ movl(out, Immediate(1));
   5858         __ jmp(&done);
   5859       } else {
   5860         __ setcc(kEqual, out);
   5861         // setcc only sets the low byte.
   5862         __ andl(out, Immediate(1));
   5863       }
   5864       break;
   5865     }
   5866 
   5867     case TypeCheckKind::kAbstractClassCheck: {
   5868       // /* HeapReference<Class> */ out = obj->klass_
   5869       GenerateReferenceLoadTwoRegisters(instruction,
   5870                                         out_loc,
   5871                                         obj_loc,
   5872                                         class_offset,
   5873                                         kCompilerReadBarrierOption);
   5874       // If the class is abstract, we eagerly fetch the super class of the
   5875       // object to avoid doing a comparison we know will fail.
   5876       NearLabel loop, success;
   5877       __ Bind(&loop);
   5878       // /* HeapReference<Class> */ out = out->super_class_
   5879       GenerateReferenceLoadOneRegister(instruction,
   5880                                        out_loc,
   5881                                        super_offset,
   5882                                        maybe_temp_loc,
   5883                                        kCompilerReadBarrierOption);
   5884       __ testl(out, out);
   5885       // If `out` is null, we use it for the result, and jump to `done`.
   5886       __ j(kEqual, &done);
   5887       if (cls.IsRegister()) {
   5888         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5889       } else {
   5890         DCHECK(cls.IsStackSlot()) << cls;
   5891         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5892       }
   5893       __ j(kNotEqual, &loop);
   5894       __ movl(out, Immediate(1));
   5895       if (zero.IsLinked()) {
   5896         __ jmp(&done);
   5897       }
   5898       break;
   5899     }
   5900 
   5901     case TypeCheckKind::kClassHierarchyCheck: {
   5902       // /* HeapReference<Class> */ out = obj->klass_
   5903       GenerateReferenceLoadTwoRegisters(instruction,
   5904                                         out_loc,
   5905                                         obj_loc,
   5906                                         class_offset,
   5907                                         kCompilerReadBarrierOption);
   5908       // Walk over the class hierarchy to find a match.
   5909       NearLabel loop, success;
   5910       __ Bind(&loop);
   5911       if (cls.IsRegister()) {
   5912         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5913       } else {
   5914         DCHECK(cls.IsStackSlot()) << cls;
   5915         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5916       }
   5917       __ j(kEqual, &success);
   5918       // /* HeapReference<Class> */ out = out->super_class_
   5919       GenerateReferenceLoadOneRegister(instruction,
   5920                                        out_loc,
   5921                                        super_offset,
   5922                                        maybe_temp_loc,
   5923                                        kCompilerReadBarrierOption);
   5924       __ testl(out, out);
   5925       __ j(kNotEqual, &loop);
   5926       // If `out` is null, we use it for the result, and jump to `done`.
   5927       __ jmp(&done);
   5928       __ Bind(&success);
   5929       __ movl(out, Immediate(1));
   5930       if (zero.IsLinked()) {
   5931         __ jmp(&done);
   5932       }
   5933       break;
   5934     }
   5935 
   5936     case TypeCheckKind::kArrayObjectCheck: {
   5937       // /* HeapReference<Class> */ out = obj->klass_
   5938       GenerateReferenceLoadTwoRegisters(instruction,
   5939                                         out_loc,
   5940                                         obj_loc,
   5941                                         class_offset,
   5942                                         kCompilerReadBarrierOption);
   5943       // Do an exact check.
   5944       NearLabel exact_check;
   5945       if (cls.IsRegister()) {
   5946         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5947       } else {
   5948         DCHECK(cls.IsStackSlot()) << cls;
   5949         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5950       }
   5951       __ j(kEqual, &exact_check);
   5952       // Otherwise, we need to check that the object's class is a non-primitive array.
   5953       // /* HeapReference<Class> */ out = out->component_type_
   5954       GenerateReferenceLoadOneRegister(instruction,
   5955                                        out_loc,
   5956                                        component_offset,
   5957                                        maybe_temp_loc,
   5958                                        kCompilerReadBarrierOption);
   5959       __ testl(out, out);
   5960       // If `out` is null, we use it for the result, and jump to `done`.
   5961       __ j(kEqual, &done);
   5962       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
   5963       __ j(kNotEqual, &zero);
   5964       __ Bind(&exact_check);
   5965       __ movl(out, Immediate(1));
   5966       __ jmp(&done);
   5967       break;
   5968     }
   5969 
   5970     case TypeCheckKind::kArrayCheck: {
   5971       // No read barrier since the slow path will retry upon failure.
   5972       // /* HeapReference<Class> */ out = obj->klass_
   5973       GenerateReferenceLoadTwoRegisters(instruction,
   5974                                         out_loc,
   5975                                         obj_loc,
   5976                                         class_offset,
   5977                                         kWithoutReadBarrier);
   5978       if (cls.IsRegister()) {
   5979         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5980       } else {
   5981         DCHECK(cls.IsStackSlot()) << cls;
   5982         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5983       }
   5984       DCHECK(locations->OnlyCallsOnSlowPath());
   5985       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
   5986                                                                        /* is_fatal */ false);
   5987       codegen_->AddSlowPath(slow_path);
   5988       __ j(kNotEqual, slow_path->GetEntryLabel());
   5989       __ movl(out, Immediate(1));
   5990       if (zero.IsLinked()) {
   5991         __ jmp(&done);
   5992       }
   5993       break;
   5994     }
   5995 
   5996     case TypeCheckKind::kUnresolvedCheck:
   5997     case TypeCheckKind::kInterfaceCheck: {
   5998       // Note that we indeed only call on slow path, but we always go
   5999       // into the slow path for the unresolved and interface check
   6000       // cases.
   6001       //
   6002       // We cannot directly call the InstanceofNonTrivial runtime
   6003       // entry point without resorting to a type checking slow path
   6004       // here (i.e. by calling InvokeRuntime directly), as it would
   6005       // require to assign fixed registers for the inputs of this
   6006       // HInstanceOf instruction (following the runtime calling
   6007       // convention), which might be cluttered by the potential first
   6008       // read barrier emission at the beginning of this method.
   6009       //
   6010       // TODO: Introduce a new runtime entry point taking the object
   6011       // to test (instead of its class) as argument, and let it deal
   6012       // with the read barrier issues. This will let us refactor this
   6013       // case of the `switch` code as it was previously (with a direct
   6014       // call to the runtime not using a type checking slow path).
   6015       // This should also be beneficial for the other cases above.
   6016       DCHECK(locations->OnlyCallsOnSlowPath());
   6017       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
   6018                                                                        /* is_fatal */ false);
   6019       codegen_->AddSlowPath(slow_path);
   6020       __ jmp(slow_path->GetEntryLabel());
   6021       if (zero.IsLinked()) {
   6022         __ jmp(&done);
   6023       }
   6024       break;
   6025     }
   6026   }
   6027 
   6028   if (zero.IsLinked()) {
   6029     __ Bind(&zero);
   6030     __ xorl(out, out);
   6031   }
   6032 
   6033   if (done.IsLinked()) {
   6034     __ Bind(&done);
   6035   }
   6036 
   6037   if (slow_path != nullptr) {
   6038     __ Bind(slow_path->GetExitLabel());
   6039   }
   6040 }
   6041 
   6042 static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
   6043   switch (type_check_kind) {
   6044     case TypeCheckKind::kExactCheck:
   6045     case TypeCheckKind::kAbstractClassCheck:
   6046     case TypeCheckKind::kClassHierarchyCheck:
   6047     case TypeCheckKind::kArrayObjectCheck:
   6048       return !throws_into_catch && !kEmitCompilerReadBarrier;
   6049     case TypeCheckKind::kInterfaceCheck:
   6050       return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
   6051     case TypeCheckKind::kArrayCheck:
   6052     case TypeCheckKind::kUnresolvedCheck:
   6053       return false;
   6054   }
   6055   LOG(FATAL) << "Unreachable";
   6056   UNREACHABLE();
   6057 }
   6058 
   6059 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
   6060   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
   6061   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   6062   bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch);
   6063   LocationSummary::CallKind call_kind = is_fatal_slow_path
   6064                                             ? LocationSummary::kNoCall
   6065                                             : LocationSummary::kCallOnSlowPath;
   6066   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   6067   locations->SetInAt(0, Location::RequiresRegister());
   6068   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
   6069     // Require a register for the interface check since there is a loop that compares the class to
   6070     // a memory address.
   6071     locations->SetInAt(1, Location::RequiresRegister());
   6072   } else {
   6073     locations->SetInAt(1, Location::Any());
   6074   }
   6075 
   6076   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
   6077   locations->AddTemp(Location::RequiresRegister());
   6078   // When read barriers are enabled, we need an additional temporary
   6079   // register for some cases.
   6080   if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
   6081     locations->AddTemp(Location::RequiresRegister());
   6082   }
   6083 }
   6084 
   6085 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
   6086   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   6087   LocationSummary* locations = instruction->GetLocations();
   6088   Location obj_loc = locations->InAt(0);
   6089   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   6090   Location cls = locations->InAt(1);
   6091   Location temp_loc = locations->GetTemp(0);
   6092   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
   6093   Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
   6094       locations->GetTemp(1) :
   6095       Location::NoLocation();
   6096   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   6097   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   6098   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   6099   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   6100   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
   6101   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
   6102   const uint32_t object_array_data_offset =
   6103       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   6104 
   6105   // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
   6106   // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
   6107   // read barriers is done for performance and code size reasons.
   6108   bool is_type_check_slow_path_fatal =
   6109       IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
   6110   SlowPathCode* type_check_slow_path =
   6111       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
   6112                                                            is_type_check_slow_path_fatal);
   6113   codegen_->AddSlowPath(type_check_slow_path);
   6114 
   6115 
   6116   NearLabel done;
   6117   // Avoid null check if we know obj is not null.
   6118   if (instruction->MustDoNullCheck()) {
   6119     __ testl(obj, obj);
   6120     __ j(kEqual, &done);
   6121   }
   6122 
   6123   switch (type_check_kind) {
   6124     case TypeCheckKind::kExactCheck:
   6125     case TypeCheckKind::kArrayCheck: {
   6126       // /* HeapReference<Class> */ temp = obj->klass_
   6127       GenerateReferenceLoadTwoRegisters(instruction,
   6128                                         temp_loc,
   6129                                         obj_loc,
   6130                                         class_offset,
   6131                                         kWithoutReadBarrier);
   6132       if (cls.IsRegister()) {
   6133         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6134       } else {
   6135         DCHECK(cls.IsStackSlot()) << cls;
   6136         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6137       }
   6138       // Jump to slow path for throwing the exception or doing a
   6139       // more involved array check.
   6140       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
   6141       break;
   6142     }
   6143 
   6144     case TypeCheckKind::kAbstractClassCheck: {
   6145       // /* HeapReference<Class> */ temp = obj->klass_
   6146       GenerateReferenceLoadTwoRegisters(instruction,
   6147                                         temp_loc,
   6148                                         obj_loc,
   6149                                         class_offset,
   6150                                         kWithoutReadBarrier);
   6151       // If the class is abstract, we eagerly fetch the super class of the
   6152       // object to avoid doing a comparison we know will fail.
   6153       NearLabel loop;
   6154       __ Bind(&loop);
   6155       // /* HeapReference<Class> */ temp = temp->super_class_
   6156       GenerateReferenceLoadOneRegister(instruction,
   6157                                        temp_loc,
   6158                                        super_offset,
   6159                                        maybe_temp2_loc,
   6160                                        kWithoutReadBarrier);
   6161 
   6162       // If the class reference currently in `temp` is null, jump to the slow path to throw the
   6163       // exception.
   6164       __ testl(temp, temp);
   6165       // Otherwise, compare the classes.
   6166       __ j(kZero, type_check_slow_path->GetEntryLabel());
   6167       if (cls.IsRegister()) {
   6168         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6169       } else {
   6170         DCHECK(cls.IsStackSlot()) << cls;
   6171         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6172       }
   6173       __ j(kNotEqual, &loop);
   6174       break;
   6175     }
   6176 
   6177     case TypeCheckKind::kClassHierarchyCheck: {
   6178       // /* HeapReference<Class> */ temp = obj->klass_
   6179       GenerateReferenceLoadTwoRegisters(instruction,
   6180                                         temp_loc,
   6181                                         obj_loc,
   6182                                         class_offset,
   6183                                         kWithoutReadBarrier);
   6184       // Walk over the class hierarchy to find a match.
   6185       NearLabel loop;
   6186       __ Bind(&loop);
   6187       if (cls.IsRegister()) {
   6188         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6189       } else {
   6190         DCHECK(cls.IsStackSlot()) << cls;
   6191         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6192       }
   6193       __ j(kEqual, &done);
   6194 
   6195       // /* HeapReference<Class> */ temp = temp->super_class_
   6196       GenerateReferenceLoadOneRegister(instruction,
   6197                                        temp_loc,
   6198                                        super_offset,
   6199                                        maybe_temp2_loc,
   6200                                        kWithoutReadBarrier);
   6201 
   6202       // If the class reference currently in `temp` is not null, jump
   6203       // back at the beginning of the loop.
   6204       __ testl(temp, temp);
   6205       __ j(kNotZero, &loop);
   6206       // Otherwise, jump to the slow path to throw the exception.
   6207       __ jmp(type_check_slow_path->GetEntryLabel());
   6208       break;
   6209     }
   6210 
   6211     case TypeCheckKind::kArrayObjectCheck: {
   6212       // /* HeapReference<Class> */ temp = obj->klass_
   6213       GenerateReferenceLoadTwoRegisters(instruction,
   6214                                         temp_loc,
   6215                                         obj_loc,
   6216                                         class_offset,
   6217                                         kWithoutReadBarrier);
   6218       // Do an exact check.
   6219       NearLabel check_non_primitive_component_type;
   6220       if (cls.IsRegister()) {
   6221         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6222       } else {
   6223         DCHECK(cls.IsStackSlot()) << cls;
   6224         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6225       }
   6226       __ j(kEqual, &done);
   6227 
   6228       // Otherwise, we need to check that the object's class is a non-primitive array.
   6229       // /* HeapReference<Class> */ temp = temp->component_type_
   6230       GenerateReferenceLoadOneRegister(instruction,
   6231                                        temp_loc,
   6232                                        component_offset,
   6233                                        maybe_temp2_loc,
   6234                                        kWithoutReadBarrier);
   6235 
   6236       // If the component type is not null (i.e. the object is indeed
   6237       // an array), jump to label `check_non_primitive_component_type`
   6238       // to further check that this component type is not a primitive
   6239       // type.
   6240       __ testl(temp, temp);
   6241       // Otherwise, jump to the slow path to throw the exception.
   6242       __ j(kZero, type_check_slow_path->GetEntryLabel());
   6243       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
   6244       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
   6245       break;
   6246     }
   6247 
   6248     case TypeCheckKind::kUnresolvedCheck: {
   6249       // We always go into the type check slow path for the unresolved case.
   6250       //
   6251       // We cannot directly call the CheckCast runtime entry point
   6252       // without resorting to a type checking slow path here (i.e. by
   6253       // calling InvokeRuntime directly), as it would require to
   6254       // assign fixed registers for the inputs of this HInstanceOf
   6255       // instruction (following the runtime calling convention), which
   6256       // might be cluttered by the potential first read barrier
   6257       // emission at the beginning of this method.
   6258       __ jmp(type_check_slow_path->GetEntryLabel());
   6259       break;
   6260     }
   6261 
   6262     case TypeCheckKind::kInterfaceCheck:
   6263       // Fast path for the interface check. We always go slow path for heap poisoning since
   6264       // unpoisoning cls would require an extra temp.
   6265       if (!kPoisonHeapReferences) {
   6266         // Try to avoid read barriers to improve the fast path. We can not get false positives by
   6267         // doing this.
   6268         // /* HeapReference<Class> */ temp = obj->klass_
   6269         GenerateReferenceLoadTwoRegisters(instruction,
   6270                                           temp_loc,
   6271                                           obj_loc,
   6272                                           class_offset,
   6273                                           kWithoutReadBarrier);
   6274 
   6275         // /* HeapReference<Class> */ temp = temp->iftable_
   6276         GenerateReferenceLoadTwoRegisters(instruction,
   6277                                           temp_loc,
   6278                                           temp_loc,
   6279                                           iftable_offset,
   6280                                           kWithoutReadBarrier);
   6281         // Iftable is never null.
   6282         __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
   6283         // Loop through the iftable and check if any class matches.
   6284         NearLabel start_loop;
   6285         __ Bind(&start_loop);
   6286         // Need to subtract first to handle the empty array case.
   6287         __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
   6288         __ j(kNegative, type_check_slow_path->GetEntryLabel());
   6289         // Go to next interface if the classes do not match.
   6290         __ cmpl(cls.AsRegister<CpuRegister>(),
   6291                 CodeGeneratorX86_64::ArrayAddress(temp,
   6292                                                   maybe_temp2_loc,
   6293                                                   TIMES_4,
   6294                                                   object_array_data_offset));
   6295         __ j(kNotEqual, &start_loop);  // Return if same class.
   6296       } else {
   6297         __ jmp(type_check_slow_path->GetEntryLabel());
   6298       }
   6299       break;
   6300   }
   6301 
   6302   if (done.IsLinked()) {
   6303     __ Bind(&done);
   6304   }
   6305 
   6306   __ Bind(type_check_slow_path->GetExitLabel());
   6307 }
   6308 
   6309 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   6310   LocationSummary* locations =
   6311       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   6312   InvokeRuntimeCallingConvention calling_convention;
   6313   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   6314 }
   6315 
   6316 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   6317   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
   6318                           instruction,
   6319                           instruction->GetDexPc());
   6320   if (instruction->IsEnter()) {
   6321     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
   6322   } else {
   6323     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   6324   }
   6325 }
   6326 
   6327 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
   6328 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
   6329 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
   6330 
   6331 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
   6332   LocationSummary* locations =
   6333       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   6334   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
   6335          || instruction->GetResultType() == Primitive::kPrimLong);
   6336   locations->SetInAt(0, Location::RequiresRegister());
   6337   locations->SetInAt(1, Location::Any());
   6338   locations->SetOut(Location::SameAsFirstInput());
   6339 }
   6340 
   6341 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
   6342   HandleBitwiseOperation(instruction);
   6343 }
   6344 
   6345 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
   6346   HandleBitwiseOperation(instruction);
   6347 }
   6348 
   6349 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
   6350   HandleBitwiseOperation(instruction);
   6351 }
   6352 
   6353 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
   6354   LocationSummary* locations = instruction->GetLocations();
   6355   Location first = locations->InAt(0);
   6356   Location second = locations->InAt(1);
   6357   DCHECK(first.Equals(locations->Out()));
   6358 
   6359   if (instruction->GetResultType() == Primitive::kPrimInt) {
   6360     if (second.IsRegister()) {
   6361       if (instruction->IsAnd()) {
   6362         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6363       } else if (instruction->IsOr()) {
   6364         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6365       } else {
   6366         DCHECK(instruction->IsXor());
   6367         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6368       }
   6369     } else if (second.IsConstant()) {
   6370       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
   6371       if (instruction->IsAnd()) {
   6372         __ andl(first.AsRegister<CpuRegister>(), imm);
   6373       } else if (instruction->IsOr()) {
   6374         __ orl(first.AsRegister<CpuRegister>(), imm);
   6375       } else {
   6376         DCHECK(instruction->IsXor());
   6377         __ xorl(first.AsRegister<CpuRegister>(), imm);
   6378       }
   6379     } else {
   6380       Address address(CpuRegister(RSP), second.GetStackIndex());
   6381       if (instruction->IsAnd()) {
   6382         __ andl(first.AsRegister<CpuRegister>(), address);
   6383       } else if (instruction->IsOr()) {
   6384         __ orl(first.AsRegister<CpuRegister>(), address);
   6385       } else {
   6386         DCHECK(instruction->IsXor());
   6387         __ xorl(first.AsRegister<CpuRegister>(), address);
   6388       }
   6389     }
   6390   } else {
   6391     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
   6392     CpuRegister first_reg = first.AsRegister<CpuRegister>();
   6393     bool second_is_constant = false;
   6394     int64_t value = 0;
   6395     if (second.IsConstant()) {
   6396       second_is_constant = true;
   6397       value = second.GetConstant()->AsLongConstant()->GetValue();
   6398     }
   6399     bool is_int32_value = IsInt<32>(value);
   6400 
   6401     if (instruction->IsAnd()) {
   6402       if (second_is_constant) {
   6403         if (is_int32_value) {
   6404           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
   6405         } else {
   6406           __ andq(first_reg, codegen_->LiteralInt64Address(value));
   6407         }
   6408       } else if (second.IsDoubleStackSlot()) {
   6409         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6410       } else {
   6411         __ andq(first_reg, second.AsRegister<CpuRegister>());
   6412       }
   6413     } else if (instruction->IsOr()) {
   6414       if (second_is_constant) {
   6415         if (is_int32_value) {
   6416           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
   6417         } else {
   6418           __ orq(first_reg, codegen_->LiteralInt64Address(value));
   6419         }
   6420       } else if (second.IsDoubleStackSlot()) {
   6421         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6422       } else {
   6423         __ orq(first_reg, second.AsRegister<CpuRegister>());
   6424       }
   6425     } else {
   6426       DCHECK(instruction->IsXor());
   6427       if (second_is_constant) {
   6428         if (is_int32_value) {
   6429           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
   6430         } else {
   6431           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
   6432         }
   6433       } else if (second.IsDoubleStackSlot()) {
   6434         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6435       } else {
   6436         __ xorq(first_reg, second.AsRegister<CpuRegister>());
   6437       }
   6438     }
   6439   }
   6440 }
   6441 
   6442 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
   6443     HInstruction* instruction,
   6444     Location out,
   6445     uint32_t offset,
   6446     Location maybe_temp,
   6447     ReadBarrierOption read_barrier_option) {
   6448   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   6449   if (read_barrier_option == kWithReadBarrier) {
   6450     CHECK(kEmitCompilerReadBarrier);
   6451     if (kUseBakerReadBarrier) {
   6452       // Load with fast path based Baker's read barrier.
   6453       // /* HeapReference<Object> */ out = *(out + offset)
   6454       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   6455           instruction, out, out_reg, offset, /* needs_null_check */ false);
   6456     } else {
   6457       // Load with slow path based read barrier.
   6458       // Save the value of `out` into `maybe_temp` before overwriting it
   6459       // in the following move operation, as we will need it for the
   6460       // read barrier below.
   6461       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
   6462       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
   6463       // /* HeapReference<Object> */ out = *(out + offset)
   6464       __ movl(out_reg, Address(out_reg, offset));
   6465       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
   6466     }
   6467   } else {
   6468     // Plain load with no read barrier.
   6469     // /* HeapReference<Object> */ out = *(out + offset)
   6470     __ movl(out_reg, Address(out_reg, offset));
   6471     __ MaybeUnpoisonHeapReference(out_reg);
   6472   }
   6473 }
   6474 
   6475 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
   6476     HInstruction* instruction,
   6477     Location out,
   6478     Location obj,
   6479     uint32_t offset,
   6480     ReadBarrierOption read_barrier_option) {
   6481   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   6482   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
   6483   if (read_barrier_option == kWithReadBarrier) {
   6484     CHECK(kEmitCompilerReadBarrier);
   6485     if (kUseBakerReadBarrier) {
   6486       // Load with fast path based Baker's read barrier.
   6487       // /* HeapReference<Object> */ out = *(obj + offset)
   6488       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   6489           instruction, out, obj_reg, offset, /* needs_null_check */ false);
   6490     } else {
   6491       // Load with slow path based read barrier.
   6492       // /* HeapReference<Object> */ out = *(obj + offset)
   6493       __ movl(out_reg, Address(obj_reg, offset));
   6494       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
   6495     }
   6496   } else {
   6497     // Plain load with no read barrier.
   6498     // /* HeapReference<Object> */ out = *(obj + offset)
   6499     __ movl(out_reg, Address(obj_reg, offset));
   6500     __ MaybeUnpoisonHeapReference(out_reg);
   6501   }
   6502 }
   6503 
   6504 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
   6505     HInstruction* instruction,
   6506     Location root,
   6507     const Address& address,
   6508     Label* fixup_label,
   6509     ReadBarrierOption read_barrier_option) {
   6510   CpuRegister root_reg = root.AsRegister<CpuRegister>();
   6511   if (read_barrier_option == kWithReadBarrier) {
   6512     DCHECK(kEmitCompilerReadBarrier);
   6513     if (kUseBakerReadBarrier) {
   6514       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
   6515       // Baker's read barrier are used:
   6516       //
   6517       //   root = obj.field;
   6518       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   6519       //   if (temp != null) {
   6520       //     root = temp(root)
   6521       //   }
   6522 
   6523       // /* GcRoot<mirror::Object> */ root = *address
   6524       __ movl(root_reg, address);
   6525       if (fixup_label != nullptr) {
   6526         __ Bind(fixup_label);
   6527       }
   6528       static_assert(
   6529           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
   6530           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
   6531           "have different sizes.");
   6532       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
   6533                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
   6534                     "have different sizes.");
   6535 
   6536       // Slow path marking the GC root `root`.
   6537       SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
   6538           instruction, root, /* unpoison_ref_before_marking */ false);
   6539       codegen_->AddSlowPath(slow_path);
   6540 
   6541       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
   6542       const int32_t entry_point_offset =
   6543           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
   6544       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
   6545       // The entrypoint is null when the GC is not marking.
   6546       __ j(kNotEqual, slow_path->GetEntryLabel());
   6547       __ Bind(slow_path->GetExitLabel());
   6548     } else {
   6549       // GC root loaded through a slow path for read barriers other
   6550       // than Baker's.
   6551       // /* GcRoot<mirror::Object>* */ root = address
   6552       __ leaq(root_reg, address);
   6553       if (fixup_label != nullptr) {
   6554         __ Bind(fixup_label);
   6555       }
   6556       // /* mirror::Object* */ root = root->Read()
   6557       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
   6558     }
   6559   } else {
   6560     // Plain GC root load with no read barrier.
   6561     // /* GcRoot<mirror::Object> */ root = *address
   6562     __ movl(root_reg, address);
   6563     if (fixup_label != nullptr) {
   6564       __ Bind(fixup_label);
   6565     }
   6566     // Note that GC roots are not affected by heap poisoning, thus we
   6567     // do not have to unpoison `root_reg` here.
   6568   }
   6569 }
   6570 
   6571 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
   6572                                                                 Location ref,
   6573                                                                 CpuRegister obj,
   6574                                                                 uint32_t offset,
   6575                                                                 bool needs_null_check) {
   6576   DCHECK(kEmitCompilerReadBarrier);
   6577   DCHECK(kUseBakerReadBarrier);
   6578 
   6579   // /* HeapReference<Object> */ ref = *(obj + offset)
   6580   Address src(obj, offset);
   6581   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
   6582 }
   6583 
   6584 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
   6585                                                                 Location ref,
   6586                                                                 CpuRegister obj,
   6587                                                                 uint32_t data_offset,
   6588                                                                 Location index,
   6589                                                                 bool needs_null_check) {
   6590   DCHECK(kEmitCompilerReadBarrier);
   6591   DCHECK(kUseBakerReadBarrier);
   6592 
   6593   static_assert(
   6594       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   6595       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   6596   // /* HeapReference<Object> */ ref =
   6597   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   6598   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
   6599   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
   6600 }
   6601 
   6602 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
   6603                                                                     Location ref,
   6604                                                                     CpuRegister obj,
   6605                                                                     const Address& src,
   6606                                                                     bool needs_null_check,
   6607                                                                     bool always_update_field,
   6608                                                                     CpuRegister* temp1,
   6609                                                                     CpuRegister* temp2) {
   6610   DCHECK(kEmitCompilerReadBarrier);
   6611   DCHECK(kUseBakerReadBarrier);
   6612 
   6613   // In slow path based read barriers, the read barrier call is
   6614   // inserted after the original load. However, in fast path based
   6615   // Baker's read barriers, we need to perform the load of
   6616   // mirror::Object::monitor_ *before* the original reference load.
   6617   // This load-load ordering is required by the read barrier.
   6618   // The fast path/slow path (for Baker's algorithm) should look like:
   6619   //
   6620   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   6621   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   6622   //   HeapReference<Object> ref = *src;  // Original reference load.
   6623   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   6624   //   if (is_gray) {
   6625   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   6626   //   }
   6627   //
   6628   // Note: the original implementation in ReadBarrier::Barrier is
   6629   // slightly more complex as:
   6630   // - it implements the load-load fence using a data dependency on
   6631   //   the high-bits of rb_state, which are expected to be all zeroes
   6632   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
   6633   //   here, which is a no-op thanks to the x86-64 memory model);
   6634   // - it performs additional checks that we do not do here for
   6635   //   performance reasons.
   6636 
   6637   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
   6638   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   6639 
   6640   // Given the numeric representation, it's enough to check the low bit of the rb_state.
   6641   static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   6642   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   6643   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   6644   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   6645   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
   6646 
   6647   // if (rb_state == ReadBarrier::GrayState())
   6648   //   ref = ReadBarrier::Mark(ref);
   6649   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   6650   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
   6651   if (needs_null_check) {
   6652     MaybeRecordImplicitNullCheck(instruction);
   6653   }
   6654 
   6655   // Load fence to prevent load-load reordering.
   6656   // Note that this is a no-op, thanks to the x86-64 memory model.
   6657   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   6658 
   6659   // The actual reference load.
   6660   // /* HeapReference<Object> */ ref = *src
   6661   __ movl(ref_reg, src);  // Flags are unaffected.
   6662 
   6663   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
   6664   // Slow path marking the object `ref` when it is gray.
   6665   SlowPathCode* slow_path;
   6666   if (always_update_field) {
   6667     DCHECK(temp1 != nullptr);
   6668     DCHECK(temp2 != nullptr);
   6669     slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
   6670         instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
   6671   } else {
   6672     slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
   6673         instruction, ref, /* unpoison_ref_before_marking */ true);
   6674   }
   6675   AddSlowPath(slow_path);
   6676 
   6677   // We have done the "if" of the gray bit check above, now branch based on the flags.
   6678   __ j(kNotZero, slow_path->GetEntryLabel());
   6679 
   6680   // Object* ref = ref_addr->AsMirrorPtr()
   6681   __ MaybeUnpoisonHeapReference(ref_reg);
   6682 
   6683   __ Bind(slow_path->GetExitLabel());
   6684 }
   6685 
   6686 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
   6687                                                   Location out,
   6688                                                   Location ref,
   6689                                                   Location obj,
   6690                                                   uint32_t offset,
   6691                                                   Location index) {
   6692   DCHECK(kEmitCompilerReadBarrier);
   6693 
   6694   // Insert a slow path based read barrier *after* the reference load.
   6695   //
   6696   // If heap poisoning is enabled, the unpoisoning of the loaded
   6697   // reference will be carried out by the runtime within the slow
   6698   // path.
   6699   //
   6700   // Note that `ref` currently does not get unpoisoned (when heap
   6701   // poisoning is enabled), which is alright as the `ref` argument is
   6702   // not used by the artReadBarrierSlow entry point.
   6703   //
   6704   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
   6705   SlowPathCode* slow_path = new (GetGraph()->GetArena())
   6706       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
   6707   AddSlowPath(slow_path);
   6708 
   6709   __ jmp(slow_path->GetEntryLabel());
   6710   __ Bind(slow_path->GetExitLabel());
   6711 }
   6712 
   6713 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
   6714                                                        Location out,
   6715                                                        Location ref,
   6716                                                        Location obj,
   6717                                                        uint32_t offset,
   6718                                                        Location index) {
   6719   if (kEmitCompilerReadBarrier) {
   6720     // Baker's read barriers shall be handled by the fast path
   6721     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
   6722     DCHECK(!kUseBakerReadBarrier);
   6723     // If heap poisoning is enabled, unpoisoning will be taken care of
   6724     // by the runtime within the slow path.
   6725     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   6726   } else if (kPoisonHeapReferences) {
   6727     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
   6728   }
   6729 }
   6730 
   6731 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
   6732                                                          Location out,
   6733                                                          Location root) {
   6734   DCHECK(kEmitCompilerReadBarrier);
   6735 
   6736   // Insert a slow path based read barrier *after* the GC root load.
   6737   //
   6738   // Note that GC roots are not affected by heap poisoning, so we do
   6739   // not need to do anything special for this here.
   6740   SlowPathCode* slow_path =
   6741       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
   6742   AddSlowPath(slow_path);
   6743 
   6744   __ jmp(slow_path->GetEntryLabel());
   6745   __ Bind(slow_path->GetExitLabel());
   6746 }
   6747 
   6748 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   6749   // Nothing to do, this should be removed during prepare for register allocator.
   6750   LOG(FATAL) << "Unreachable";
   6751 }
   6752 
   6753 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   6754   // Nothing to do, this should be removed during prepare for register allocator.
   6755   LOG(FATAL) << "Unreachable";
   6756 }
   6757 
   6758 // Simple implementation of packed switch - generate cascaded compare/jumps.
   6759 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   6760   LocationSummary* locations =
   6761       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   6762   locations->SetInAt(0, Location::RequiresRegister());
   6763   locations->AddTemp(Location::RequiresRegister());
   6764   locations->AddTemp(Location::RequiresRegister());
   6765 }
   6766 
   6767 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   6768   int32_t lower_bound = switch_instr->GetStartValue();
   6769   uint32_t num_entries = switch_instr->GetNumEntries();
   6770   LocationSummary* locations = switch_instr->GetLocations();
   6771   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   6772   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   6773   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   6774   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   6775 
   6776   // Should we generate smaller inline compare/jumps?
   6777   if (num_entries <= kPackedSwitchJumpTableThreshold) {
   6778     // Figure out the correct compare values and jump conditions.
   6779     // Handle the first compare/branch as a special case because it might
   6780     // jump to the default case.
   6781     DCHECK_GT(num_entries, 2u);
   6782     Condition first_condition;
   6783     uint32_t index;
   6784     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   6785     if (lower_bound != 0) {
   6786       first_condition = kLess;
   6787       __ cmpl(value_reg_in, Immediate(lower_bound));
   6788       __ j(first_condition, codegen_->GetLabelOf(default_block));
   6789       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
   6790 
   6791       index = 1;
   6792     } else {
   6793       // Handle all the compare/jumps below.
   6794       first_condition = kBelow;
   6795       index = 0;
   6796     }
   6797 
   6798     // Handle the rest of the compare/jumps.
   6799     for (; index + 1 < num_entries; index += 2) {
   6800       int32_t compare_to_value = lower_bound + index + 1;
   6801       __ cmpl(value_reg_in, Immediate(compare_to_value));
   6802       // Jump to successors[index] if value < case_value[index].
   6803       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
   6804       // Jump to successors[index + 1] if value == case_value[index + 1].
   6805       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
   6806     }
   6807 
   6808     if (index != num_entries) {
   6809       // There are an odd number of entries. Handle the last one.
   6810       DCHECK_EQ(index + 1, num_entries);
   6811       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
   6812       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   6813     }
   6814 
   6815     // And the default for any other value.
   6816     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
   6817       __ jmp(codegen_->GetLabelOf(default_block));
   6818     }
   6819     return;
   6820   }
   6821 
   6822   // Remove the bias, if needed.
   6823   Register value_reg_out = value_reg_in.AsRegister();
   6824   if (lower_bound != 0) {
   6825     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
   6826     value_reg_out = temp_reg.AsRegister();
   6827   }
   6828   CpuRegister value_reg(value_reg_out);
   6829 
   6830   // Is the value in range?
   6831   __ cmpl(value_reg, Immediate(num_entries - 1));
   6832   __ j(kAbove, codegen_->GetLabelOf(default_block));
   6833 
   6834   // We are in the range of the table.
   6835   // Load the address of the jump table in the constant area.
   6836   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
   6837 
   6838   // Load the (signed) offset from the jump table.
   6839   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
   6840 
   6841   // Add the offset to the address of the table base.
   6842   __ addq(temp_reg, base_reg);
   6843 
   6844   // And jump.
   6845   __ jmp(temp_reg);
   6846 }
   6847 
   6848 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
   6849   if (value == 0) {
   6850     __ xorl(dest, dest);
   6851   } else {
   6852     __ movl(dest, Immediate(value));
   6853   }
   6854 }
   6855 
   6856 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
   6857   if (value == 0) {
   6858     // Clears upper bits too.
   6859     __ xorl(dest, dest);
   6860   } else if (IsUint<32>(value)) {
   6861     // We can use a 32 bit move, as it will zero-extend and is shorter.
   6862     __ movl(dest, Immediate(static_cast<int32_t>(value)));
   6863   } else {
   6864     __ movq(dest, Immediate(value));
   6865   }
   6866 }
   6867 
   6868 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
   6869   if (value == 0) {
   6870     __ xorps(dest, dest);
   6871   } else {
   6872     __ movss(dest, LiteralInt32Address(value));
   6873   }
   6874 }
   6875 
   6876 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
   6877   if (value == 0) {
   6878     __ xorpd(dest, dest);
   6879   } else {
   6880     __ movsd(dest, LiteralInt64Address(value));
   6881   }
   6882 }
   6883 
   6884 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
   6885   Load32BitValue(dest, bit_cast<int32_t, float>(value));
   6886 }
   6887 
   6888 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
   6889   Load64BitValue(dest, bit_cast<int64_t, double>(value));
   6890 }
   6891 
   6892 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
   6893   if (value == 0) {
   6894     __ testl(dest, dest);
   6895   } else {
   6896     __ cmpl(dest, Immediate(value));
   6897   }
   6898 }
   6899 
   6900 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
   6901   if (IsInt<32>(value)) {
   6902     if (value == 0) {
   6903       __ testq(dest, dest);
   6904     } else {
   6905       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
   6906     }
   6907   } else {
   6908     // Value won't fit in an int.
   6909     __ cmpq(dest, LiteralInt64Address(value));
   6910   }
   6911 }
   6912 
   6913 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
   6914   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
   6915   GenerateIntCompare(lhs_reg, rhs);
   6916 }
   6917 
   6918 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
   6919   if (rhs.IsConstant()) {
   6920     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
   6921     Compare32BitValue(lhs, value);
   6922   } else if (rhs.IsStackSlot()) {
   6923     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   6924   } else {
   6925     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
   6926   }
   6927 }
   6928 
   6929 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
   6930   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
   6931   if (rhs.IsConstant()) {
   6932     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
   6933     Compare64BitValue(lhs_reg, value);
   6934   } else if (rhs.IsDoubleStackSlot()) {
   6935     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   6936   } else {
   6937     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
   6938   }
   6939 }
   6940 
   6941 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
   6942                                           Location index,
   6943                                           ScaleFactor scale,
   6944                                           uint32_t data_offset) {
   6945   return index.IsConstant() ?
   6946       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
   6947       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
   6948 }
   6949 
   6950 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
   6951   DCHECK(dest.IsDoubleStackSlot());
   6952   if (IsInt<32>(value)) {
   6953     // Can move directly as an int32 constant.
   6954     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
   6955             Immediate(static_cast<int32_t>(value)));
   6956   } else {
   6957     Load64BitValue(CpuRegister(TMP), value);
   6958     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
   6959   }
   6960 }
   6961 
   6962 /**
   6963  * Class to handle late fixup of offsets into constant area.
   6964  */
   6965 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
   6966  public:
   6967   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
   6968       : codegen_(&codegen), offset_into_constant_area_(offset) {}
   6969 
   6970  protected:
   6971   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
   6972 
   6973   CodeGeneratorX86_64* codegen_;
   6974 
   6975  private:
   6976   void Process(const MemoryRegion& region, int pos) OVERRIDE {
   6977     // Patch the correct offset for the instruction.  We use the address of the
   6978     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
   6979     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
   6980     int32_t relative_position = constant_offset - pos;
   6981 
   6982     // Patch in the right value.
   6983     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
   6984   }
   6985 
   6986   // Location in constant area that the fixup refers to.
   6987   size_t offset_into_constant_area_;
   6988 };
   6989 
   6990 /**
   6991  t * Class to handle late fixup of offsets to a jump table that will be created in the
   6992  * constant area.
   6993  */
   6994 class JumpTableRIPFixup : public RIPFixup {
   6995  public:
   6996   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
   6997       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
   6998 
   6999   void CreateJumpTable() {
   7000     X86_64Assembler* assembler = codegen_->GetAssembler();
   7001 
   7002     // Ensure that the reference to the jump table has the correct offset.
   7003     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
   7004     SetOffset(offset_in_constant_table);
   7005 
   7006     // Compute the offset from the start of the function to this jump table.
   7007     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
   7008 
   7009     // Populate the jump table with the correct values for the jump table.
   7010     int32_t num_entries = switch_instr_->GetNumEntries();
   7011     HBasicBlock* block = switch_instr_->GetBlock();
   7012     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
   7013     // The value that we want is the target offset - the position of the table.
   7014     for (int32_t i = 0; i < num_entries; i++) {
   7015       HBasicBlock* b = successors[i];
   7016       Label* l = codegen_->GetLabelOf(b);
   7017       DCHECK(l->IsBound());
   7018       int32_t offset_to_block = l->Position() - current_table_offset;
   7019       assembler->AppendInt32(offset_to_block);
   7020     }
   7021   }
   7022 
   7023  private:
   7024   const HPackedSwitch* switch_instr_;
   7025 };
   7026 
   7027 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   7028   // Generate the constant area if needed.
   7029   X86_64Assembler* assembler = GetAssembler();
   7030   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
   7031     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
   7032     assembler->Align(4, 0);
   7033     constant_area_start_ = assembler->CodeSize();
   7034 
   7035     // Populate any jump tables.
   7036     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
   7037       jump_table->CreateJumpTable();
   7038     }
   7039 
   7040     // And now add the constant area to the generated code.
   7041     assembler->AddConstantArea();
   7042   }
   7043 
   7044   // And finish up.
   7045   CodeGenerator::Finalize(allocator);
   7046 }
   7047 
   7048 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
   7049   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
   7050   return Address::RIP(fixup);
   7051 }
   7052 
   7053 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
   7054   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
   7055   return Address::RIP(fixup);
   7056 }
   7057 
   7058 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
   7059   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
   7060   return Address::RIP(fixup);
   7061 }
   7062 
   7063 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
   7064   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
   7065   return Address::RIP(fixup);
   7066 }
   7067 
   7068 // TODO: trg as memory.
   7069 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   7070   if (!trg.IsValid()) {
   7071     DCHECK_EQ(type, Primitive::kPrimVoid);
   7072     return;
   7073   }
   7074 
   7075   DCHECK_NE(type, Primitive::kPrimVoid);
   7076 
   7077   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
   7078   if (trg.Equals(return_loc)) {
   7079     return;
   7080   }
   7081 
   7082   // Let the parallel move resolver take care of all of this.
   7083   HParallelMove parallel_move(GetGraph()->GetArena());
   7084   parallel_move.AddMove(return_loc, trg, type, nullptr);
   7085   GetMoveResolver()->EmitNativeCode(&parallel_move);
   7086 }
   7087 
   7088 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
   7089   // Create a fixup to be used to create and address the jump table.
   7090   JumpTableRIPFixup* table_fixup =
   7091       new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
   7092 
   7093   // We have to populate the jump tables.
   7094   fixups_to_jump_tables_.push_back(table_fixup);
   7095   return Address::RIP(table_fixup);
   7096 }
   7097 
   7098 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
   7099                                              const Address& addr_high,
   7100                                              int64_t v,
   7101                                              HInstruction* instruction) {
   7102   if (IsInt<32>(v)) {
   7103     int32_t v_32 = v;
   7104     __ movq(addr_low, Immediate(v_32));
   7105     MaybeRecordImplicitNullCheck(instruction);
   7106   } else {
   7107     // Didn't fit in a register.  Do it in pieces.
   7108     int32_t low_v = Low32Bits(v);
   7109     int32_t high_v = High32Bits(v);
   7110     __ movl(addr_low, Immediate(low_v));
   7111     MaybeRecordImplicitNullCheck(instruction);
   7112     __ movl(addr_high, Immediate(high_v));
   7113   }
   7114 }
   7115 
   7116 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
   7117                                           const uint8_t* roots_data,
   7118                                           const PatchInfo<Label>& info,
   7119                                           uint64_t index_in_table) const {
   7120   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
   7121   uintptr_t address =
   7122       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
   7123   typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
   7124   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
   7125      dchecked_integral_cast<uint32_t>(address);
   7126 }
   7127 
   7128 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   7129   for (const PatchInfo<Label>& info : jit_string_patches_) {
   7130     const auto it = jit_string_roots_.find(
   7131         StringReference(&info.dex_file, dex::StringIndex(info.index)));
   7132     DCHECK(it != jit_string_roots_.end());
   7133     uint64_t index_in_table = it->second;
   7134     PatchJitRootUse(code, roots_data, info, index_in_table);
   7135   }
   7136 
   7137   for (const PatchInfo<Label>& info : jit_class_patches_) {
   7138     const auto it = jit_class_roots_.find(
   7139         TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
   7140     DCHECK(it != jit_class_roots_.end());
   7141     uint64_t index_in_table = it->second;
   7142     PatchJitRootUse(code, roots_data, info, index_in_table);
   7143   }
   7144 }
   7145 
   7146 #undef __
   7147 
   7148 }  // namespace x86_64
   7149 }  // namespace art
   7150