Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_x86_64.h"
     18 
     19 #include "art_method.h"
     20 #include "class_table.h"
     21 #include "code_generator_utils.h"
     22 #include "compiled_method.h"
     23 #include "entrypoints/quick/quick_entrypoints.h"
     24 #include "gc/accounting/card_table.h"
     25 #include "heap_poisoning.h"
     26 #include "intrinsics.h"
     27 #include "intrinsics_x86_64.h"
     28 #include "linker/linker_patch.h"
     29 #include "lock_word.h"
     30 #include "mirror/array-inl.h"
     31 #include "mirror/class-inl.h"
     32 #include "mirror/object_reference.h"
     33 #include "thread.h"
     34 #include "utils/assembler.h"
     35 #include "utils/stack_checks.h"
     36 #include "utils/x86_64/assembler_x86_64.h"
     37 #include "utils/x86_64/managed_register_x86_64.h"
     38 
     39 namespace art {
     40 
     41 template<class MirrorType>
     42 class GcRoot;
     43 
     44 namespace x86_64 {
     45 
     46 static constexpr int kCurrentMethodStackOffset = 0;
     47 static constexpr Register kMethodRegisterArgument = RDI;
     48 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
     49 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
     50 // generates less code/data with a small num_entries.
     51 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
     52 
     53 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
     54 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
     55 
     56 static constexpr int kC2ConditionMask = 0x400;
     57 
     58 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
     59 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
     60 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
     61 
     62 class NullCheckSlowPathX86_64 : public SlowPathCode {
     63  public:
     64   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
     65 
     66   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     67     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     68     __ Bind(GetEntryLabel());
     69     if (instruction_->CanThrowIntoCatchBlock()) {
     70       // Live registers will be restored in the catch block if caught.
     71       SaveLiveRegisters(codegen, instruction_->GetLocations());
     72     }
     73     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
     74                                   instruction_,
     75                                   instruction_->GetDexPc(),
     76                                   this);
     77     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
     78   }
     79 
     80   bool IsFatal() const OVERRIDE { return true; }
     81 
     82   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
     83 
     84  private:
     85   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
     86 };
     87 
     88 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
     89  public:
     90   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
     91 
     92   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     93     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     94     __ Bind(GetEntryLabel());
     95     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
     96     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
     97   }
     98 
     99   bool IsFatal() const OVERRIDE { return true; }
    100 
    101   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
    102 
    103  private:
    104   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
    105 };
    106 
    107 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
    108  public:
    109   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
    110       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
    111 
    112   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    113     __ Bind(GetEntryLabel());
    114     if (type_ == DataType::Type::kInt32) {
    115       if (is_div_) {
    116         __ negl(cpu_reg_);
    117       } else {
    118         __ xorl(cpu_reg_, cpu_reg_);
    119       }
    120 
    121     } else {
    122       DCHECK_EQ(DataType::Type::kInt64, type_);
    123       if (is_div_) {
    124         __ negq(cpu_reg_);
    125       } else {
    126         __ xorl(cpu_reg_, cpu_reg_);
    127       }
    128     }
    129     __ jmp(GetExitLabel());
    130   }
    131 
    132   const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
    133 
    134  private:
    135   const CpuRegister cpu_reg_;
    136   const DataType::Type type_;
    137   const bool is_div_;
    138   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
    139 };
    140 
    141 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
    142  public:
    143   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
    144       : SlowPathCode(instruction), successor_(successor) {}
    145 
    146   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    147     LocationSummary* locations = instruction_->GetLocations();
    148     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    149     __ Bind(GetEntryLabel());
    150     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
    151     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
    152     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
    153     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
    154     if (successor_ == nullptr) {
    155       __ jmp(GetReturnLabel());
    156     } else {
    157       __ jmp(x86_64_codegen->GetLabelOf(successor_));
    158     }
    159   }
    160 
    161   Label* GetReturnLabel() {
    162     DCHECK(successor_ == nullptr);
    163     return &return_label_;
    164   }
    165 
    166   HBasicBlock* GetSuccessor() const {
    167     return successor_;
    168   }
    169 
    170   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
    171 
    172  private:
    173   HBasicBlock* const successor_;
    174   Label return_label_;
    175 
    176   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
    177 };
    178 
    179 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
    180  public:
    181   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
    182     : SlowPathCode(instruction) {}
    183 
    184   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    185     LocationSummary* locations = instruction_->GetLocations();
    186     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    187     __ Bind(GetEntryLabel());
    188     if (instruction_->CanThrowIntoCatchBlock()) {
    189       // Live registers will be restored in the catch block if caught.
    190       SaveLiveRegisters(codegen, instruction_->GetLocations());
    191     }
    192     // Are we using an array length from memory?
    193     HInstruction* array_length = instruction_->InputAt(1);
    194     Location length_loc = locations->InAt(1);
    195     InvokeRuntimeCallingConvention calling_convention;
    196     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
    197       // Load the array length into our temporary.
    198       HArrayLength* length = array_length->AsArrayLength();
    199       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
    200       Location array_loc = array_length->GetLocations()->InAt(0);
    201       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
    202       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
    203       // Check for conflicts with index.
    204       if (length_loc.Equals(locations->InAt(0))) {
    205         // We know we aren't using parameter 2.
    206         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
    207       }
    208       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
    209       if (mirror::kUseStringCompression && length->IsStringLength()) {
    210         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
    211       }
    212     }
    213 
    214     // We're moving two locations to locations that could overlap, so we need a parallel
    215     // move resolver.
    216     codegen->EmitParallelMoves(
    217         locations->InAt(0),
    218         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    219         DataType::Type::kInt32,
    220         length_loc,
    221         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    222         DataType::Type::kInt32);
    223     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
    224         ? kQuickThrowStringBounds
    225         : kQuickThrowArrayBounds;
    226     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
    227     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
    228     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
    229   }
    230 
    231   bool IsFatal() const OVERRIDE { return true; }
    232 
    233   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
    234 
    235  private:
    236   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
    237 };
    238 
    239 class LoadClassSlowPathX86_64 : public SlowPathCode {
    240  public:
    241   LoadClassSlowPathX86_64(HLoadClass* cls,
    242                           HInstruction* at,
    243                           uint32_t dex_pc,
    244                           bool do_clinit)
    245       : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
    246     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
    247   }
    248 
    249   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    250     LocationSummary* locations = instruction_->GetLocations();
    251     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    252     __ Bind(GetEntryLabel());
    253 
    254     SaveLiveRegisters(codegen, locations);
    255 
    256     // Custom calling convention: RAX serves as both input and output.
    257     __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
    258     x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
    259                                   instruction_,
    260                                   dex_pc_,
    261                                   this);
    262     if (do_clinit_) {
    263       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
    264     } else {
    265       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
    266     }
    267 
    268     Location out = locations->Out();
    269     // Move the class to the desired location.
    270     if (out.IsValid()) {
    271       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    272       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
    273     }
    274 
    275     RestoreLiveRegisters(codegen, locations);
    276     __ jmp(GetExitLabel());
    277   }
    278 
    279   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
    280 
    281  private:
    282   // The class this slow path will load.
    283   HLoadClass* const cls_;
    284 
    285   // The dex PC of `at_`.
    286   const uint32_t dex_pc_;
    287 
    288   // Whether to initialize the class.
    289   const bool do_clinit_;
    290 
    291   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
    292 };
    293 
    294 class LoadStringSlowPathX86_64 : public SlowPathCode {
    295  public:
    296   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
    297 
    298   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    299     LocationSummary* locations = instruction_->GetLocations();
    300     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    301 
    302     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    303     __ Bind(GetEntryLabel());
    304     SaveLiveRegisters(codegen, locations);
    305 
    306     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
    307     // Custom calling convention: RAX serves as both input and output.
    308     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
    309     x86_64_codegen->InvokeRuntime(kQuickResolveString,
    310                                   instruction_,
    311                                   instruction_->GetDexPc(),
    312                                   this);
    313     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
    314     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
    315     RestoreLiveRegisters(codegen, locations);
    316 
    317     __ jmp(GetExitLabel());
    318   }
    319 
    320   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
    321 
    322  private:
    323   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
    324 };
    325 
    326 class TypeCheckSlowPathX86_64 : public SlowPathCode {
    327  public:
    328   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
    329       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
    330 
    331   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    332     LocationSummary* locations = instruction_->GetLocations();
    333     uint32_t dex_pc = instruction_->GetDexPc();
    334     DCHECK(instruction_->IsCheckCast()
    335            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    336 
    337     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    338     __ Bind(GetEntryLabel());
    339 
    340     if (kPoisonHeapReferences &&
    341         instruction_->IsCheckCast() &&
    342         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
    343       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
    344       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
    345     }
    346 
    347     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
    348       SaveLiveRegisters(codegen, locations);
    349     }
    350 
    351     // We're moving two locations to locations that could overlap, so we need a parallel
    352     // move resolver.
    353     InvokeRuntimeCallingConvention calling_convention;
    354     codegen->EmitParallelMoves(locations->InAt(0),
    355                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    356                                DataType::Type::kReference,
    357                                locations->InAt(1),
    358                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    359                                DataType::Type::kReference);
    360     if (instruction_->IsInstanceOf()) {
    361       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
    362       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
    363     } else {
    364       DCHECK(instruction_->IsCheckCast());
    365       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
    366       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
    367     }
    368 
    369     if (!is_fatal_) {
    370       if (instruction_->IsInstanceOf()) {
    371         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
    372       }
    373 
    374       RestoreLiveRegisters(codegen, locations);
    375       __ jmp(GetExitLabel());
    376     }
    377   }
    378 
    379   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
    380 
    381   bool IsFatal() const OVERRIDE { return is_fatal_; }
    382 
    383  private:
    384   const bool is_fatal_;
    385 
    386   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
    387 };
    388 
    389 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
    390  public:
    391   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
    392       : SlowPathCode(instruction) {}
    393 
    394   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    395     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    396     __ Bind(GetEntryLabel());
    397     LocationSummary* locations = instruction_->GetLocations();
    398     SaveLiveRegisters(codegen, locations);
    399     InvokeRuntimeCallingConvention calling_convention;
    400     x86_64_codegen->Load32BitValue(
    401         CpuRegister(calling_convention.GetRegisterAt(0)),
    402         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
    403     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
    404     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
    405   }
    406 
    407   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
    408 
    409  private:
    410   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
    411 };
    412 
    413 class ArraySetSlowPathX86_64 : public SlowPathCode {
    414  public:
    415   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
    416 
    417   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    418     LocationSummary* locations = instruction_->GetLocations();
    419     __ Bind(GetEntryLabel());
    420     SaveLiveRegisters(codegen, locations);
    421 
    422     InvokeRuntimeCallingConvention calling_convention;
    423     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
    424     parallel_move.AddMove(
    425         locations->InAt(0),
    426         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    427         DataType::Type::kReference,
    428         nullptr);
    429     parallel_move.AddMove(
    430         locations->InAt(1),
    431         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    432         DataType::Type::kInt32,
    433         nullptr);
    434     parallel_move.AddMove(
    435         locations->InAt(2),
    436         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
    437         DataType::Type::kReference,
    438         nullptr);
    439     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    440 
    441     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    442     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
    443     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
    444     RestoreLiveRegisters(codegen, locations);
    445     __ jmp(GetExitLabel());
    446   }
    447 
    448   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
    449 
    450  private:
    451   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
    452 };
    453 
    454 // Slow path marking an object reference `ref` during a read
    455 // barrier. The field `obj.field` in the object `obj` holding this
    456 // reference does not get updated by this slow path after marking (see
    457 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
    458 //
    459 // This means that after the execution of this slow path, `ref` will
    460 // always be up-to-date, but `obj.field` may not; i.e., after the
    461 // flip, `ref` will be a to-space reference, but `obj.field` will
    462 // probably still be a from-space reference (unless it gets updated by
    463 // another thread, or if another thread installed another object
    464 // reference (different from `ref`) in `obj.field`).
    465 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
    466  public:
    467   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
    468                                 Location ref,
    469                                 bool unpoison_ref_before_marking)
    470       : SlowPathCode(instruction),
    471         ref_(ref),
    472         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
    473     DCHECK(kEmitCompilerReadBarrier);
    474   }
    475 
    476   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
    477 
    478   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    479     LocationSummary* locations = instruction_->GetLocations();
    480     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
    481     Register ref_reg = ref_cpu_reg.AsRegister();
    482     DCHECK(locations->CanCall());
    483     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
    484     DCHECK(instruction_->IsInstanceFieldGet() ||
    485            instruction_->IsStaticFieldGet() ||
    486            instruction_->IsArrayGet() ||
    487            instruction_->IsArraySet() ||
    488            instruction_->IsLoadClass() ||
    489            instruction_->IsLoadString() ||
    490            instruction_->IsInstanceOf() ||
    491            instruction_->IsCheckCast() ||
    492            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
    493            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
    494         << "Unexpected instruction in read barrier marking slow path: "
    495         << instruction_->DebugName();
    496 
    497     __ Bind(GetEntryLabel());
    498     if (unpoison_ref_before_marking_) {
    499       // Object* ref = ref_addr->AsMirrorPtr()
    500       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
    501     }
    502     // No need to save live registers; it's taken care of by the
    503     // entrypoint. Also, there is no need to update the stack mask,
    504     // as this runtime call will not trigger a garbage collection.
    505     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    506     DCHECK_NE(ref_reg, RSP);
    507     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
    508     // "Compact" slow path, saving two moves.
    509     //
    510     // Instead of using the standard runtime calling convention (input
    511     // and output in R0):
    512     //
    513     //   RDI <- ref
    514     //   RAX <- ReadBarrierMark(RDI)
    515     //   ref <- RAX
    516     //
    517     // we just use rX (the register containing `ref`) as input and output
    518     // of a dedicated entrypoint:
    519     //
    520     //   rX <- ReadBarrierMarkRegX(rX)
    521     //
    522     int32_t entry_point_offset =
    523         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
    524     // This runtime call does not require a stack map.
    525     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    526     __ jmp(GetExitLabel());
    527   }
    528 
    529  private:
    530   // The location (register) of the marked object reference.
    531   const Location ref_;
    532   // Should the reference in `ref_` be unpoisoned prior to marking it?
    533   const bool unpoison_ref_before_marking_;
    534 
    535   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
    536 };
    537 
    538 // Slow path marking an object reference `ref` during a read barrier,
    539 // and if needed, atomically updating the field `obj.field` in the
    540 // object `obj` holding this reference after marking (contrary to
    541 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
    542 // `obj.field`).
    543 //
    544 // This means that after the execution of this slow path, both `ref`
    545 // and `obj.field` will be up-to-date; i.e., after the flip, both will
    546 // hold the same to-space reference (unless another thread installed
    547 // another object reference (different from `ref`) in `obj.field`).
    548 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
    549  public:
    550   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
    551                                               Location ref,
    552                                               CpuRegister obj,
    553                                               const Address& field_addr,
    554                                               bool unpoison_ref_before_marking,
    555                                               CpuRegister temp1,
    556                                               CpuRegister temp2)
    557       : SlowPathCode(instruction),
    558         ref_(ref),
    559         obj_(obj),
    560         field_addr_(field_addr),
    561         unpoison_ref_before_marking_(unpoison_ref_before_marking),
    562         temp1_(temp1),
    563         temp2_(temp2) {
    564     DCHECK(kEmitCompilerReadBarrier);
    565   }
    566 
    567   const char* GetDescription() const OVERRIDE {
    568     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
    569   }
    570 
    571   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    572     LocationSummary* locations = instruction_->GetLocations();
    573     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
    574     Register ref_reg = ref_cpu_reg.AsRegister();
    575     DCHECK(locations->CanCall());
    576     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
    577     // This slow path is only used by the UnsafeCASObject intrinsic.
    578     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
    579         << "Unexpected instruction in read barrier marking and field updating slow path: "
    580         << instruction_->DebugName();
    581     DCHECK(instruction_->GetLocations()->Intrinsified());
    582     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
    583 
    584     __ Bind(GetEntryLabel());
    585     if (unpoison_ref_before_marking_) {
    586       // Object* ref = ref_addr->AsMirrorPtr()
    587       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
    588     }
    589 
    590     // Save the old (unpoisoned) reference.
    591     __ movl(temp1_, ref_cpu_reg);
    592 
    593     // No need to save live registers; it's taken care of by the
    594     // entrypoint. Also, there is no need to update the stack mask,
    595     // as this runtime call will not trigger a garbage collection.
    596     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    597     DCHECK_NE(ref_reg, RSP);
    598     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
    599     // "Compact" slow path, saving two moves.
    600     //
    601     // Instead of using the standard runtime calling convention (input
    602     // and output in R0):
    603     //
    604     //   RDI <- ref
    605     //   RAX <- ReadBarrierMark(RDI)
    606     //   ref <- RAX
    607     //
    608     // we just use rX (the register containing `ref`) as input and output
    609     // of a dedicated entrypoint:
    610     //
    611     //   rX <- ReadBarrierMarkRegX(rX)
    612     //
    613     int32_t entry_point_offset =
    614         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
    615     // This runtime call does not require a stack map.
    616     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
    617 
    618     // If the new reference is different from the old reference,
    619     // update the field in the holder (`*field_addr`).
    620     //
    621     // Note that this field could also hold a different object, if
    622     // another thread had concurrently changed it. In that case, the
    623     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
    624     // operation below would abort the CAS, leaving the field as-is.
    625     NearLabel done;
    626     __ cmpl(temp1_, ref_cpu_reg);
    627     __ j(kEqual, &done);
    628 
    629     // Update the the holder's field atomically.  This may fail if
    630     // mutator updates before us, but it's OK.  This is achived
    631     // using a strong compare-and-set (CAS) operation with relaxed
    632     // memory synchronization ordering, where the expected value is
    633     // the old reference and the desired value is the new reference.
    634     // This operation is implemented with a 32-bit LOCK CMPXLCHG
    635     // instruction, which requires the expected value (the old
    636     // reference) to be in EAX.  Save RAX beforehand, and move the
    637     // expected value (stored in `temp1_`) into EAX.
    638     __ movq(temp2_, CpuRegister(RAX));
    639     __ movl(CpuRegister(RAX), temp1_);
    640 
    641     // Convenience aliases.
    642     CpuRegister base = obj_;
    643     CpuRegister expected = CpuRegister(RAX);
    644     CpuRegister value = ref_cpu_reg;
    645 
    646     bool base_equals_value = (base.AsRegister() == value.AsRegister());
    647     Register value_reg = ref_reg;
    648     if (kPoisonHeapReferences) {
    649       if (base_equals_value) {
    650         // If `base` and `value` are the same register location, move
    651         // `value_reg` to a temporary register.  This way, poisoning
    652         // `value_reg` won't invalidate `base`.
    653         value_reg = temp1_.AsRegister();
    654         __ movl(CpuRegister(value_reg), base);
    655       }
    656 
    657       // Check that the register allocator did not assign the location
    658       // of `expected` (RAX) to `value` nor to `base`, so that heap
    659       // poisoning (when enabled) works as intended below.
    660       // - If `value` were equal to `expected`, both references would
    661       //   be poisoned twice, meaning they would not be poisoned at
    662       //   all, as heap poisoning uses address negation.
    663       // - If `base` were equal to `expected`, poisoning `expected`
    664       //   would invalidate `base`.
    665       DCHECK_NE(value_reg, expected.AsRegister());
    666       DCHECK_NE(base.AsRegister(), expected.AsRegister());
    667 
    668       __ PoisonHeapReference(expected);
    669       __ PoisonHeapReference(CpuRegister(value_reg));
    670     }
    671 
    672     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
    673 
    674     // If heap poisoning is enabled, we need to unpoison the values
    675     // that were poisoned earlier.
    676     if (kPoisonHeapReferences) {
    677       if (base_equals_value) {
    678         // `value_reg` has been moved to a temporary register, no need
    679         // to unpoison it.
    680       } else {
    681         __ UnpoisonHeapReference(CpuRegister(value_reg));
    682       }
    683       // No need to unpoison `expected` (RAX), as it is be overwritten below.
    684     }
    685 
    686     // Restore RAX.
    687     __ movq(CpuRegister(RAX), temp2_);
    688 
    689     __ Bind(&done);
    690     __ jmp(GetExitLabel());
    691   }
    692 
    693  private:
    694   // The location (register) of the marked object reference.
    695   const Location ref_;
    696   // The register containing the object holding the marked object reference field.
    697   const CpuRegister obj_;
    698   // The address of the marked reference field.  The base of this address must be `obj_`.
    699   const Address field_addr_;
    700 
    701   // Should the reference in `ref_` be unpoisoned prior to marking it?
    702   const bool unpoison_ref_before_marking_;
    703 
    704   const CpuRegister temp1_;
    705   const CpuRegister temp2_;
    706 
    707   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
    708 };
    709 
    710 // Slow path generating a read barrier for a heap reference.
    711 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
    712  public:
    713   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
    714                                             Location out,
    715                                             Location ref,
    716                                             Location obj,
    717                                             uint32_t offset,
    718                                             Location index)
    719       : SlowPathCode(instruction),
    720         out_(out),
    721         ref_(ref),
    722         obj_(obj),
    723         offset_(offset),
    724         index_(index) {
    725     DCHECK(kEmitCompilerReadBarrier);
    726     // If `obj` is equal to `out` or `ref`, it means the initial
    727     // object has been overwritten by (or after) the heap object
    728     // reference load to be instrumented, e.g.:
    729     //
    730     //   __ movl(out, Address(out, offset));
    731     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
    732     //
    733     // In that case, we have lost the information about the original
    734     // object, and the emitted read barrier cannot work properly.
    735     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
    736     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
    737 }
    738 
    739   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    740     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    741     LocationSummary* locations = instruction_->GetLocations();
    742     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
    743     DCHECK(locations->CanCall());
    744     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
    745     DCHECK(instruction_->IsInstanceFieldGet() ||
    746            instruction_->IsStaticFieldGet() ||
    747            instruction_->IsArrayGet() ||
    748            instruction_->IsInstanceOf() ||
    749            instruction_->IsCheckCast() ||
    750            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
    751         << "Unexpected instruction in read barrier for heap reference slow path: "
    752         << instruction_->DebugName();
    753 
    754     __ Bind(GetEntryLabel());
    755     SaveLiveRegisters(codegen, locations);
    756 
    757     // We may have to change the index's value, but as `index_` is a
    758     // constant member (like other "inputs" of this slow path),
    759     // introduce a copy of it, `index`.
    760     Location index = index_;
    761     if (index_.IsValid()) {
    762       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
    763       if (instruction_->IsArrayGet()) {
    764         // Compute real offset and store it in index_.
    765         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
    766         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
    767         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
    768           // We are about to change the value of `index_reg` (see the
    769           // calls to art::x86_64::X86_64Assembler::shll and
    770           // art::x86_64::X86_64Assembler::AddImmediate below), but it
    771           // has not been saved by the previous call to
    772           // art::SlowPathCode::SaveLiveRegisters, as it is a
    773           // callee-save register --
    774           // art::SlowPathCode::SaveLiveRegisters does not consider
    775           // callee-save registers, as it has been designed with the
    776           // assumption that callee-save registers are supposed to be
    777           // handled by the called function.  So, as a callee-save
    778           // register, `index_reg` _would_ eventually be saved onto
    779           // the stack, but it would be too late: we would have
    780           // changed its value earlier.  Therefore, we manually save
    781           // it here into another freely available register,
    782           // `free_reg`, chosen of course among the caller-save
    783           // registers (as a callee-save `free_reg` register would
    784           // exhibit the same problem).
    785           //
    786           // Note we could have requested a temporary register from
    787           // the register allocator instead; but we prefer not to, as
    788           // this is a slow path, and we know we can find a
    789           // caller-save register that is available.
    790           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
    791           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
    792           index_reg = free_reg;
    793           index = Location::RegisterLocation(index_reg);
    794         } else {
    795           // The initial register stored in `index_` has already been
    796           // saved in the call to art::SlowPathCode::SaveLiveRegisters
    797           // (as it is not a callee-save register), so we can freely
    798           // use it.
    799         }
    800         // Shifting the index value contained in `index_reg` by the
    801         // scale factor (2) cannot overflow in practice, as the
    802         // runtime is unable to allocate object arrays with a size
    803         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
    804         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
    805         static_assert(
    806             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
    807             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
    808         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
    809       } else {
    810         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
    811         // intrinsics, `index_` is not shifted by a scale factor of 2
    812         // (as in the case of ArrayGet), as it is actually an offset
    813         // to an object field within an object.
    814         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
    815         DCHECK(instruction_->GetLocations()->Intrinsified());
    816         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
    817                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
    818             << instruction_->AsInvoke()->GetIntrinsic();
    819         DCHECK_EQ(offset_, 0U);
    820         DCHECK(index_.IsRegister());
    821       }
    822     }
    823 
    824     // We're moving two or three locations to locations that could
    825     // overlap, so we need a parallel move resolver.
    826     InvokeRuntimeCallingConvention calling_convention;
    827     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
    828     parallel_move.AddMove(ref_,
    829                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    830                           DataType::Type::kReference,
    831                           nullptr);
    832     parallel_move.AddMove(obj_,
    833                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    834                           DataType::Type::kReference,
    835                           nullptr);
    836     if (index.IsValid()) {
    837       parallel_move.AddMove(index,
    838                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
    839                             DataType::Type::kInt32,
    840                             nullptr);
    841       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    842     } else {
    843       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    844       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
    845     }
    846     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
    847                                   instruction_,
    848                                   instruction_->GetDexPc(),
    849                                   this);
    850     CheckEntrypointTypes<
    851         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
    852     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
    853 
    854     RestoreLiveRegisters(codegen, locations);
    855     __ jmp(GetExitLabel());
    856   }
    857 
    858   const char* GetDescription() const OVERRIDE {
    859     return "ReadBarrierForHeapReferenceSlowPathX86_64";
    860   }
    861 
    862  private:
    863   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
    864     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
    865     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
    866     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
    867       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
    868         return static_cast<CpuRegister>(i);
    869       }
    870     }
    871     // We shall never fail to find a free caller-save register, as
    872     // there are more than two core caller-save registers on x86-64
    873     // (meaning it is possible to find one which is different from
    874     // `ref` and `obj`).
    875     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
    876     LOG(FATAL) << "Could not find a free caller-save register";
    877     UNREACHABLE();
    878   }
    879 
    880   const Location out_;
    881   const Location ref_;
    882   const Location obj_;
    883   const uint32_t offset_;
    884   // An additional location containing an index to an array.
    885   // Only used for HArrayGet and the UnsafeGetObject &
    886   // UnsafeGetObjectVolatile intrinsics.
    887   const Location index_;
    888 
    889   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
    890 };
    891 
    892 // Slow path generating a read barrier for a GC root.
    893 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
    894  public:
    895   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
    896       : SlowPathCode(instruction), out_(out), root_(root) {
    897     DCHECK(kEmitCompilerReadBarrier);
    898   }
    899 
    900   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    901     LocationSummary* locations = instruction_->GetLocations();
    902     DCHECK(locations->CanCall());
    903     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
    904     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
    905         << "Unexpected instruction in read barrier for GC root slow path: "
    906         << instruction_->DebugName();
    907 
    908     __ Bind(GetEntryLabel());
    909     SaveLiveRegisters(codegen, locations);
    910 
    911     InvokeRuntimeCallingConvention calling_convention;
    912     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    913     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
    914     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
    915                                   instruction_,
    916                                   instruction_->GetDexPc(),
    917                                   this);
    918     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
    919     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
    920 
    921     RestoreLiveRegisters(codegen, locations);
    922     __ jmp(GetExitLabel());
    923   }
    924 
    925   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
    926 
    927  private:
    928   const Location out_;
    929   const Location root_;
    930 
    931   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
    932 };
    933 
    934 #undef __
    935 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
    936 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
    937 
    938 inline Condition X86_64IntegerCondition(IfCondition cond) {
    939   switch (cond) {
    940     case kCondEQ: return kEqual;
    941     case kCondNE: return kNotEqual;
    942     case kCondLT: return kLess;
    943     case kCondLE: return kLessEqual;
    944     case kCondGT: return kGreater;
    945     case kCondGE: return kGreaterEqual;
    946     case kCondB:  return kBelow;
    947     case kCondBE: return kBelowEqual;
    948     case kCondA:  return kAbove;
    949     case kCondAE: return kAboveEqual;
    950   }
    951   LOG(FATAL) << "Unreachable";
    952   UNREACHABLE();
    953 }
    954 
    955 // Maps FP condition to x86_64 name.
    956 inline Condition X86_64FPCondition(IfCondition cond) {
    957   switch (cond) {
    958     case kCondEQ: return kEqual;
    959     case kCondNE: return kNotEqual;
    960     case kCondLT: return kBelow;
    961     case kCondLE: return kBelowEqual;
    962     case kCondGT: return kAbove;
    963     case kCondGE: return kAboveEqual;
    964     default:      break;  // should not happen
    965   }
    966   LOG(FATAL) << "Unreachable";
    967   UNREACHABLE();
    968 }
    969 
    970 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
    971       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    972       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
    973   return desired_dispatch_info;
    974 }
    975 
    976 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
    977     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
    978   // All registers are assumed to be correctly set up.
    979 
    980   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
    981   switch (invoke->GetMethodLoadKind()) {
    982     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
    983       // temp = thread->string_init_entrypoint
    984       uint32_t offset =
    985           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
    986       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
    987       break;
    988     }
    989     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
    990       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
    991       break;
    992     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
    993       DCHECK(GetCompilerOptions().IsBootImage());
    994       __ leal(temp.AsRegister<CpuRegister>(),
    995               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
    996       RecordBootImageMethodPatch(invoke);
    997       break;
    998     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
    999       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
   1000       break;
   1001     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
   1002       __ movq(temp.AsRegister<CpuRegister>(),
   1003               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
   1004       RecordMethodBssEntryPatch(invoke);
   1005       break;
   1006     }
   1007     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
   1008       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
   1009       return;  // No code pointer retrieval; the runtime performs the call directly.
   1010     }
   1011   }
   1012 
   1013   switch (invoke->GetCodePtrLocation()) {
   1014     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
   1015       __ call(&frame_entry_label_);
   1016       break;
   1017     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
   1018       // (callee_method + offset_of_quick_compiled_code)()
   1019       __ call(Address(callee_method.AsRegister<CpuRegister>(),
   1020                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
   1021                           kX86_64PointerSize).SizeValue()));
   1022       break;
   1023   }
   1024   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   1025 
   1026   DCHECK(!IsLeafMethod());
   1027 }
   1028 
   1029 void CodeGeneratorX86_64::GenerateVirtualCall(
   1030     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
   1031   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   1032   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   1033       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
   1034 
   1035   // Use the calling convention instead of the location of the receiver, as
   1036   // intrinsics may have put the receiver in a different register. In the intrinsics
   1037   // slow path, the arguments have been moved to the right place, so here we are
   1038   // guaranteed that the receiver is the first register of the calling convention.
   1039   InvokeDexCallingConvention calling_convention;
   1040   Register receiver = calling_convention.GetRegisterAt(0);
   1041 
   1042   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
   1043   // /* HeapReference<Class> */ temp = receiver->klass_
   1044   __ movl(temp, Address(CpuRegister(receiver), class_offset));
   1045   MaybeRecordImplicitNullCheck(invoke);
   1046   // Instead of simply (possibly) unpoisoning `temp` here, we should
   1047   // emit a read barrier for the previous class reference load.
   1048   // However this is not required in practice, as this is an
   1049   // intermediate/temporary reference and because the current
   1050   // concurrent copying collector keeps the from-space memory
   1051   // intact/accessible until the end of the marking phase (the
   1052   // concurrent copying collector may not in the future).
   1053   __ MaybeUnpoisonHeapReference(temp);
   1054   // temp = temp->GetMethodAt(method_offset);
   1055   __ movq(temp, Address(temp, method_offset));
   1056   // call temp->GetEntryPoint();
   1057   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
   1058       kX86_64PointerSize).SizeValue()));
   1059   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   1060 }
   1061 
   1062 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
   1063   boot_image_method_patches_.emplace_back(
   1064       invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
   1065   __ Bind(&boot_image_method_patches_.back().label);
   1066 }
   1067 
   1068 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
   1069   method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
   1070   __ Bind(&method_bss_entry_patches_.back().label);
   1071 }
   1072 
   1073 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
   1074   boot_image_type_patches_.emplace_back(
   1075       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
   1076   __ Bind(&boot_image_type_patches_.back().label);
   1077 }
   1078 
   1079 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
   1080   type_bss_entry_patches_.emplace_back(
   1081       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
   1082   return &type_bss_entry_patches_.back().label;
   1083 }
   1084 
   1085 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
   1086   boot_image_string_patches_.emplace_back(
   1087       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
   1088   __ Bind(&boot_image_string_patches_.back().label);
   1089 }
   1090 
   1091 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
   1092   DCHECK(!GetCompilerOptions().IsBootImage());
   1093   string_bss_entry_patches_.emplace_back(
   1094       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
   1095   return &string_bss_entry_patches_.back().label;
   1096 }
   1097 
   1098 // The label points to the end of the "movl" or another instruction but the literal offset
   1099 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
   1100 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
   1101 
   1102 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
   1103 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
   1104     const ArenaDeque<PatchInfo<Label>>& infos,
   1105     ArenaVector<linker::LinkerPatch>* linker_patches) {
   1106   for (const PatchInfo<Label>& info : infos) {
   1107     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
   1108     linker_patches->push_back(
   1109         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
   1110   }
   1111 }
   1112 
   1113 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
   1114   DCHECK(linker_patches->empty());
   1115   size_t size =
   1116       boot_image_method_patches_.size() +
   1117       method_bss_entry_patches_.size() +
   1118       boot_image_type_patches_.size() +
   1119       type_bss_entry_patches_.size() +
   1120       boot_image_string_patches_.size() +
   1121       string_bss_entry_patches_.size();
   1122   linker_patches->reserve(size);
   1123   if (GetCompilerOptions().IsBootImage()) {
   1124     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
   1125         boot_image_method_patches_, linker_patches);
   1126     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
   1127         boot_image_type_patches_, linker_patches);
   1128     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
   1129         boot_image_string_patches_, linker_patches);
   1130   } else {
   1131     DCHECK(boot_image_method_patches_.empty());
   1132     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
   1133         boot_image_type_patches_, linker_patches);
   1134     EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
   1135         boot_image_string_patches_, linker_patches);
   1136   }
   1137   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
   1138       method_bss_entry_patches_, linker_patches);
   1139   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
   1140       type_bss_entry_patches_, linker_patches);
   1141   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
   1142       string_bss_entry_patches_, linker_patches);
   1143   DCHECK_EQ(size, linker_patches->size());
   1144 }
   1145 
   1146 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   1147   stream << Register(reg);
   1148 }
   1149 
   1150 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
   1151   stream << FloatRegister(reg);
   1152 }
   1153 
   1154 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   1155   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
   1156   return kX86_64WordSize;
   1157 }
   1158 
   1159 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   1160   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
   1161   return kX86_64WordSize;
   1162 }
   1163 
   1164 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1165   if (GetGraph()->HasSIMD()) {
   1166     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
   1167   } else {
   1168     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
   1169   }
   1170   return GetFloatingPointSpillSlotSize();
   1171 }
   1172 
   1173 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   1174   if (GetGraph()->HasSIMD()) {
   1175     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
   1176   } else {
   1177     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
   1178   }
   1179   return GetFloatingPointSpillSlotSize();
   1180 }
   1181 
   1182 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
   1183                                         HInstruction* instruction,
   1184                                         uint32_t dex_pc,
   1185                                         SlowPathCode* slow_path) {
   1186   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
   1187   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
   1188   if (EntrypointRequiresStackMap(entrypoint)) {
   1189     RecordPcInfo(instruction, dex_pc, slow_path);
   1190   }
   1191 }
   1192 
   1193 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
   1194                                                               HInstruction* instruction,
   1195                                                               SlowPathCode* slow_path) {
   1196   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
   1197   GenerateInvokeRuntime(entry_point_offset);
   1198 }
   1199 
   1200 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
   1201   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
   1202 }
   1203 
   1204 static constexpr int kNumberOfCpuRegisterPairs = 0;
   1205 // Use a fake return address register to mimic Quick.
   1206 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
   1207 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
   1208                                          const X86_64InstructionSetFeatures& isa_features,
   1209                                          const CompilerOptions& compiler_options,
   1210                                          OptimizingCompilerStats* stats)
   1211       : CodeGenerator(graph,
   1212                       kNumberOfCpuRegisters,
   1213                       kNumberOfFloatRegisters,
   1214                       kNumberOfCpuRegisterPairs,
   1215                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
   1216                                           arraysize(kCoreCalleeSaves))
   1217                           | (1 << kFakeReturnRegister),
   1218                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
   1219                                           arraysize(kFpuCalleeSaves)),
   1220                       compiler_options,
   1221                       stats),
   1222         block_labels_(nullptr),
   1223         location_builder_(graph, this),
   1224         instruction_visitor_(graph, this),
   1225         move_resolver_(graph->GetAllocator(), this),
   1226         assembler_(graph->GetAllocator()),
   1227         isa_features_(isa_features),
   1228         constant_area_start_(0),
   1229         boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1230         method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1231         boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1232         type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1233         boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1234         string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1235         jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1236         jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
   1237         fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
   1238   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
   1239 }
   1240 
   1241 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
   1242                                                                CodeGeneratorX86_64* codegen)
   1243       : InstructionCodeGenerator(graph, codegen),
   1244         assembler_(codegen->GetAssembler()),
   1245         codegen_(codegen) {}
   1246 
   1247 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   1248   // Stack register is always reserved.
   1249   blocked_core_registers_[RSP] = true;
   1250 
   1251   // Block the register used as TMP.
   1252   blocked_core_registers_[TMP] = true;
   1253 }
   1254 
   1255 static dwarf::Reg DWARFReg(Register reg) {
   1256   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
   1257 }
   1258 
   1259 static dwarf::Reg DWARFReg(FloatRegister reg) {
   1260   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
   1261 }
   1262 
   1263 void CodeGeneratorX86_64::GenerateFrameEntry() {
   1264   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
   1265   __ Bind(&frame_entry_label_);
   1266   bool skip_overflow_check = IsLeafMethod()
   1267       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
   1268   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   1269 
   1270   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
   1271     __ addw(Address(CpuRegister(kMethodRegisterArgument),
   1272                     ArtMethod::HotnessCountOffset().Int32Value()),
   1273             Immediate(1));
   1274   }
   1275 
   1276   if (!skip_overflow_check) {
   1277     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
   1278     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
   1279     RecordPcInfo(nullptr, 0);
   1280   }
   1281 
   1282   if (HasEmptyFrame()) {
   1283     return;
   1284   }
   1285 
   1286   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
   1287     Register reg = kCoreCalleeSaves[i];
   1288     if (allocated_registers_.ContainsCoreRegister(reg)) {
   1289       __ pushq(CpuRegister(reg));
   1290       __ cfi().AdjustCFAOffset(kX86_64WordSize);
   1291       __ cfi().RelOffset(DWARFReg(reg), 0);
   1292     }
   1293   }
   1294 
   1295   int adjust = GetFrameSize() - GetCoreSpillSize();
   1296   __ subq(CpuRegister(RSP), Immediate(adjust));
   1297   __ cfi().AdjustCFAOffset(adjust);
   1298   uint32_t xmm_spill_location = GetFpuSpillStart();
   1299   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   1300 
   1301   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
   1302     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
   1303       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
   1304       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
   1305       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
   1306     }
   1307   }
   1308 
   1309   // Save the current method if we need it. Note that we do not
   1310   // do this in HCurrentMethod, as the instruction might have been removed
   1311   // in the SSA graph.
   1312   if (RequiresCurrentMethod()) {
   1313     __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
   1314             CpuRegister(kMethodRegisterArgument));
   1315   }
   1316 
   1317   if (GetGraph()->HasShouldDeoptimizeFlag()) {
   1318     // Initialize should_deoptimize flag to 0.
   1319     __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
   1320   }
   1321 }
   1322 
   1323 void CodeGeneratorX86_64::GenerateFrameExit() {
   1324   __ cfi().RememberState();
   1325   if (!HasEmptyFrame()) {
   1326     uint32_t xmm_spill_location = GetFpuSpillStart();
   1327     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   1328     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
   1329       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
   1330         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
   1331         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
   1332         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
   1333       }
   1334     }
   1335 
   1336     int adjust = GetFrameSize() - GetCoreSpillSize();
   1337     __ addq(CpuRegister(RSP), Immediate(adjust));
   1338     __ cfi().AdjustCFAOffset(-adjust);
   1339 
   1340     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
   1341       Register reg = kCoreCalleeSaves[i];
   1342       if (allocated_registers_.ContainsCoreRegister(reg)) {
   1343         __ popq(CpuRegister(reg));
   1344         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
   1345         __ cfi().Restore(DWARFReg(reg));
   1346       }
   1347     }
   1348   }
   1349   __ ret();
   1350   __ cfi().RestoreState();
   1351   __ cfi().DefCFAOffset(GetFrameSize());
   1352 }
   1353 
   1354 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
   1355   __ Bind(GetLabelOf(block));
   1356 }
   1357 
   1358 void CodeGeneratorX86_64::Move(Location destination, Location source) {
   1359   if (source.Equals(destination)) {
   1360     return;
   1361   }
   1362   if (destination.IsRegister()) {
   1363     CpuRegister dest = destination.AsRegister<CpuRegister>();
   1364     if (source.IsRegister()) {
   1365       __ movq(dest, source.AsRegister<CpuRegister>());
   1366     } else if (source.IsFpuRegister()) {
   1367       __ movd(dest, source.AsFpuRegister<XmmRegister>());
   1368     } else if (source.IsStackSlot()) {
   1369       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1370     } else if (source.IsConstant()) {
   1371       HConstant* constant = source.GetConstant();
   1372       if (constant->IsLongConstant()) {
   1373         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
   1374       } else {
   1375         Load32BitValue(dest, GetInt32ValueOf(constant));
   1376       }
   1377     } else {
   1378       DCHECK(source.IsDoubleStackSlot());
   1379       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1380     }
   1381   } else if (destination.IsFpuRegister()) {
   1382     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   1383     if (source.IsRegister()) {
   1384       __ movd(dest, source.AsRegister<CpuRegister>());
   1385     } else if (source.IsFpuRegister()) {
   1386       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
   1387     } else if (source.IsConstant()) {
   1388       HConstant* constant = source.GetConstant();
   1389       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
   1390       if (constant->IsFloatConstant()) {
   1391         Load32BitValue(dest, static_cast<int32_t>(value));
   1392       } else {
   1393         Load64BitValue(dest, value);
   1394       }
   1395     } else if (source.IsStackSlot()) {
   1396       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1397     } else {
   1398       DCHECK(source.IsDoubleStackSlot());
   1399       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1400     }
   1401   } else if (destination.IsStackSlot()) {
   1402     if (source.IsRegister()) {
   1403       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1404               source.AsRegister<CpuRegister>());
   1405     } else if (source.IsFpuRegister()) {
   1406       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1407                source.AsFpuRegister<XmmRegister>());
   1408     } else if (source.IsConstant()) {
   1409       HConstant* constant = source.GetConstant();
   1410       int32_t value = GetInt32ValueOf(constant);
   1411       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
   1412     } else {
   1413       DCHECK(source.IsStackSlot()) << source;
   1414       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   1415       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   1416     }
   1417   } else {
   1418     DCHECK(destination.IsDoubleStackSlot());
   1419     if (source.IsRegister()) {
   1420       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1421               source.AsRegister<CpuRegister>());
   1422     } else if (source.IsFpuRegister()) {
   1423       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1424                source.AsFpuRegister<XmmRegister>());
   1425     } else if (source.IsConstant()) {
   1426       HConstant* constant = source.GetConstant();
   1427       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
   1428       int64_t value = GetInt64ValueOf(constant);
   1429       Store64BitValueToStack(destination, value);
   1430     } else {
   1431       DCHECK(source.IsDoubleStackSlot());
   1432       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   1433       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   1434     }
   1435   }
   1436 }
   1437 
   1438 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
   1439   DCHECK(location.IsRegister());
   1440   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
   1441 }
   1442 
   1443 void CodeGeneratorX86_64::MoveLocation(
   1444     Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
   1445   Move(dst, src);
   1446 }
   1447 
   1448 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
   1449   if (location.IsRegister()) {
   1450     locations->AddTemp(location);
   1451   } else {
   1452     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
   1453   }
   1454 }
   1455 
   1456 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   1457   if (successor->IsExitBlock()) {
   1458     DCHECK(got->GetPrevious()->AlwaysThrows());
   1459     return;  // no code needed
   1460   }
   1461 
   1462   HBasicBlock* block = got->GetBlock();
   1463   HInstruction* previous = got->GetPrevious();
   1464 
   1465   HLoopInformation* info = block->GetLoopInformation();
   1466   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
   1467     if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
   1468       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
   1469       __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
   1470               Immediate(1));
   1471     }
   1472     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
   1473     return;
   1474   }
   1475 
   1476   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
   1477     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   1478   }
   1479   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
   1480     __ jmp(codegen_->GetLabelOf(successor));
   1481   }
   1482 }
   1483 
   1484 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
   1485   got->SetLocations(nullptr);
   1486 }
   1487 
   1488 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
   1489   HandleGoto(got, got->GetSuccessor());
   1490 }
   1491 
   1492 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
   1493   try_boundary->SetLocations(nullptr);
   1494 }
   1495 
   1496 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
   1497   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
   1498   if (!successor->IsExitBlock()) {
   1499     HandleGoto(try_boundary, successor);
   1500   }
   1501 }
   1502 
   1503 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
   1504   exit->SetLocations(nullptr);
   1505 }
   1506 
   1507 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
   1508 }
   1509 
   1510 template<class LabelType>
   1511 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
   1512                                                      LabelType* true_label,
   1513                                                      LabelType* false_label) {
   1514   if (cond->IsFPConditionTrueIfNaN()) {
   1515     __ j(kUnordered, true_label);
   1516   } else if (cond->IsFPConditionFalseIfNaN()) {
   1517     __ j(kUnordered, false_label);
   1518   }
   1519   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
   1520 }
   1521 
   1522 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
   1523   LocationSummary* locations = condition->GetLocations();
   1524 
   1525   Location left = locations->InAt(0);
   1526   Location right = locations->InAt(1);
   1527   DataType::Type type = condition->InputAt(0)->GetType();
   1528   switch (type) {
   1529     case DataType::Type::kBool:
   1530     case DataType::Type::kUint8:
   1531     case DataType::Type::kInt8:
   1532     case DataType::Type::kUint16:
   1533     case DataType::Type::kInt16:
   1534     case DataType::Type::kInt32:
   1535     case DataType::Type::kReference: {
   1536       codegen_->GenerateIntCompare(left, right);
   1537       break;
   1538     }
   1539     case DataType::Type::kInt64: {
   1540       codegen_->GenerateLongCompare(left, right);
   1541       break;
   1542     }
   1543     case DataType::Type::kFloat32: {
   1544       if (right.IsFpuRegister()) {
   1545         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
   1546       } else if (right.IsConstant()) {
   1547         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
   1548                    codegen_->LiteralFloatAddress(
   1549                      right.GetConstant()->AsFloatConstant()->GetValue()));
   1550       } else {
   1551         DCHECK(right.IsStackSlot());
   1552         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
   1553                    Address(CpuRegister(RSP), right.GetStackIndex()));
   1554       }
   1555       break;
   1556     }
   1557     case DataType::Type::kFloat64: {
   1558       if (right.IsFpuRegister()) {
   1559         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
   1560       } else if (right.IsConstant()) {
   1561         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
   1562                    codegen_->LiteralDoubleAddress(
   1563                      right.GetConstant()->AsDoubleConstant()->GetValue()));
   1564       } else {
   1565         DCHECK(right.IsDoubleStackSlot());
   1566         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
   1567                    Address(CpuRegister(RSP), right.GetStackIndex()));
   1568       }
   1569       break;
   1570     }
   1571     default:
   1572       LOG(FATAL) << "Unexpected condition type " << type;
   1573   }
   1574 }
   1575 
   1576 template<class LabelType>
   1577 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
   1578                                                                   LabelType* true_target_in,
   1579                                                                   LabelType* false_target_in) {
   1580   // Generated branching requires both targets to be explicit. If either of the
   1581   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
   1582   LabelType fallthrough_target;
   1583   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
   1584   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
   1585 
   1586   // Generate the comparison to set the CC.
   1587   GenerateCompareTest(condition);
   1588 
   1589   // Now generate the correct jump(s).
   1590   DataType::Type type = condition->InputAt(0)->GetType();
   1591   switch (type) {
   1592     case DataType::Type::kInt64: {
   1593       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
   1594       break;
   1595     }
   1596     case DataType::Type::kFloat32: {
   1597       GenerateFPJumps(condition, true_target, false_target);
   1598       break;
   1599     }
   1600     case DataType::Type::kFloat64: {
   1601       GenerateFPJumps(condition, true_target, false_target);
   1602       break;
   1603     }
   1604     default:
   1605       LOG(FATAL) << "Unexpected condition type " << type;
   1606   }
   1607 
   1608   if (false_target != &fallthrough_target) {
   1609     __ jmp(false_target);
   1610   }
   1611 
   1612   if (fallthrough_target.IsLinked()) {
   1613     __ Bind(&fallthrough_target);
   1614   }
   1615 }
   1616 
   1617 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
   1618   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
   1619   // are set only strictly before `branch`. We can't use the eflags on long
   1620   // conditions if they are materialized due to the complex branching.
   1621   return cond->IsCondition() &&
   1622          cond->GetNext() == branch &&
   1623          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
   1624 }
   1625 
   1626 template<class LabelType>
   1627 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
   1628                                                            size_t condition_input_index,
   1629                                                            LabelType* true_target,
   1630                                                            LabelType* false_target) {
   1631   HInstruction* cond = instruction->InputAt(condition_input_index);
   1632 
   1633   if (true_target == nullptr && false_target == nullptr) {
   1634     // Nothing to do. The code always falls through.
   1635     return;
   1636   } else if (cond->IsIntConstant()) {
   1637     // Constant condition, statically compared against "true" (integer value 1).
   1638     if (cond->AsIntConstant()->IsTrue()) {
   1639       if (true_target != nullptr) {
   1640         __ jmp(true_target);
   1641       }
   1642     } else {
   1643       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
   1644       if (false_target != nullptr) {
   1645         __ jmp(false_target);
   1646       }
   1647     }
   1648     return;
   1649   }
   1650 
   1651   // The following code generates these patterns:
   1652   //  (1) true_target == nullptr && false_target != nullptr
   1653   //        - opposite condition true => branch to false_target
   1654   //  (2) true_target != nullptr && false_target == nullptr
   1655   //        - condition true => branch to true_target
   1656   //  (3) true_target != nullptr && false_target != nullptr
   1657   //        - condition true => branch to true_target
   1658   //        - branch to false_target
   1659   if (IsBooleanValueOrMaterializedCondition(cond)) {
   1660     if (AreEflagsSetFrom(cond, instruction)) {
   1661       if (true_target == nullptr) {
   1662         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
   1663       } else {
   1664         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
   1665       }
   1666     } else {
   1667       // Materialized condition, compare against 0.
   1668       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
   1669       if (lhs.IsRegister()) {
   1670         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
   1671       } else {
   1672         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
   1673       }
   1674       if (true_target == nullptr) {
   1675         __ j(kEqual, false_target);
   1676       } else {
   1677         __ j(kNotEqual, true_target);
   1678       }
   1679     }
   1680   } else {
   1681     // Condition has not been materialized, use its inputs as the
   1682     // comparison and its condition as the branch condition.
   1683     HCondition* condition = cond->AsCondition();
   1684 
   1685     // If this is a long or FP comparison that has been folded into
   1686     // the HCondition, generate the comparison directly.
   1687     DataType::Type type = condition->InputAt(0)->GetType();
   1688     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
   1689       GenerateCompareTestAndBranch(condition, true_target, false_target);
   1690       return;
   1691     }
   1692 
   1693     Location lhs = condition->GetLocations()->InAt(0);
   1694     Location rhs = condition->GetLocations()->InAt(1);
   1695     codegen_->GenerateIntCompare(lhs, rhs);
   1696       if (true_target == nullptr) {
   1697       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
   1698     } else {
   1699       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
   1700     }
   1701   }
   1702 
   1703   // If neither branch falls through (case 3), the conditional branch to `true_target`
   1704   // was already emitted (case 2) and we need to emit a jump to `false_target`.
   1705   if (true_target != nullptr && false_target != nullptr) {
   1706     __ jmp(false_target);
   1707   }
   1708 }
   1709 
   1710 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
   1711   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
   1712   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
   1713     locations->SetInAt(0, Location::Any());
   1714   }
   1715 }
   1716 
   1717 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
   1718   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   1719   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
   1720   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
   1721       nullptr : codegen_->GetLabelOf(true_successor);
   1722   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
   1723       nullptr : codegen_->GetLabelOf(false_successor);
   1724   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
   1725 }
   1726 
   1727 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   1728   LocationSummary* locations = new (GetGraph()->GetAllocator())
   1729       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   1730   InvokeRuntimeCallingConvention calling_convention;
   1731   RegisterSet caller_saves = RegisterSet::Empty();
   1732   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   1733   locations->SetCustomSlowPathCallerSaves(caller_saves);
   1734   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
   1735     locations->SetInAt(0, Location::Any());
   1736   }
   1737 }
   1738 
   1739 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   1740   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
   1741   GenerateTestAndBranch<Label>(deoptimize,
   1742                                /* condition_input_index */ 0,
   1743                                slow_path->GetEntryLabel(),
   1744                                /* false_target */ nullptr);
   1745 }
   1746 
   1747 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   1748   LocationSummary* locations = new (GetGraph()->GetAllocator())
   1749       LocationSummary(flag, LocationSummary::kNoCall);
   1750   locations->SetOut(Location::RequiresRegister());
   1751 }
   1752 
   1753 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
   1754   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
   1755           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
   1756 }
   1757 
   1758 static bool SelectCanUseCMOV(HSelect* select) {
   1759   // There are no conditional move instructions for XMMs.
   1760   if (DataType::IsFloatingPointType(select->GetType())) {
   1761     return false;
   1762   }
   1763 
   1764   // A FP condition doesn't generate the single CC that we need.
   1765   HInstruction* condition = select->GetCondition();
   1766   if (condition->IsCondition() &&
   1767       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
   1768     return false;
   1769   }
   1770 
   1771   // We can generate a CMOV for this Select.
   1772   return true;
   1773 }
   1774 
   1775 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
   1776   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
   1777   if (DataType::IsFloatingPointType(select->GetType())) {
   1778     locations->SetInAt(0, Location::RequiresFpuRegister());
   1779     locations->SetInAt(1, Location::Any());
   1780   } else {
   1781     locations->SetInAt(0, Location::RequiresRegister());
   1782     if (SelectCanUseCMOV(select)) {
   1783       if (select->InputAt(1)->IsConstant()) {
   1784         locations->SetInAt(1, Location::RequiresRegister());
   1785       } else {
   1786         locations->SetInAt(1, Location::Any());
   1787       }
   1788     } else {
   1789       locations->SetInAt(1, Location::Any());
   1790     }
   1791   }
   1792   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
   1793     locations->SetInAt(2, Location::RequiresRegister());
   1794   }
   1795   locations->SetOut(Location::SameAsFirstInput());
   1796 }
   1797 
   1798 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
   1799   LocationSummary* locations = select->GetLocations();
   1800   if (SelectCanUseCMOV(select)) {
   1801     // If both the condition and the source types are integer, we can generate
   1802     // a CMOV to implement Select.
   1803     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
   1804     Location value_true_loc = locations->InAt(1);
   1805     DCHECK(locations->InAt(0).Equals(locations->Out()));
   1806 
   1807     HInstruction* select_condition = select->GetCondition();
   1808     Condition cond = kNotEqual;
   1809 
   1810     // Figure out how to test the 'condition'.
   1811     if (select_condition->IsCondition()) {
   1812       HCondition* condition = select_condition->AsCondition();
   1813       if (!condition->IsEmittedAtUseSite()) {
   1814         // This was a previously materialized condition.
   1815         // Can we use the existing condition code?
   1816         if (AreEflagsSetFrom(condition, select)) {
   1817           // Materialization was the previous instruction.  Condition codes are right.
   1818           cond = X86_64IntegerCondition(condition->GetCondition());
   1819         } else {
   1820           // No, we have to recreate the condition code.
   1821           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
   1822           __ testl(cond_reg, cond_reg);
   1823         }
   1824       } else {
   1825         GenerateCompareTest(condition);
   1826         cond = X86_64IntegerCondition(condition->GetCondition());
   1827       }
   1828     } else {
   1829       // Must be a Boolean condition, which needs to be compared to 0.
   1830       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
   1831       __ testl(cond_reg, cond_reg);
   1832     }
   1833 
   1834     // If the condition is true, overwrite the output, which already contains false.
   1835     // Generate the correct sized CMOV.
   1836     bool is_64_bit = DataType::Is64BitType(select->GetType());
   1837     if (value_true_loc.IsRegister()) {
   1838       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
   1839     } else {
   1840       __ cmov(cond,
   1841               value_false,
   1842               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
   1843     }
   1844   } else {
   1845     NearLabel false_target;
   1846     GenerateTestAndBranch<NearLabel>(select,
   1847                                      /* condition_input_index */ 2,
   1848                                      /* true_target */ nullptr,
   1849                                      &false_target);
   1850     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
   1851     __ Bind(&false_target);
   1852   }
   1853 }
   1854 
   1855 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   1856   new (GetGraph()->GetAllocator()) LocationSummary(info);
   1857 }
   1858 
   1859 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
   1860   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
   1861 }
   1862 
   1863 void CodeGeneratorX86_64::GenerateNop() {
   1864   __ nop();
   1865 }
   1866 
   1867 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   1868   LocationSummary* locations =
   1869       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
   1870   // Handle the long/FP comparisons made in instruction simplification.
   1871   switch (cond->InputAt(0)->GetType()) {
   1872     case DataType::Type::kInt64:
   1873       locations->SetInAt(0, Location::RequiresRegister());
   1874       locations->SetInAt(1, Location::Any());
   1875       break;
   1876     case DataType::Type::kFloat32:
   1877     case DataType::Type::kFloat64:
   1878       locations->SetInAt(0, Location::RequiresFpuRegister());
   1879       locations->SetInAt(1, Location::Any());
   1880       break;
   1881     default:
   1882       locations->SetInAt(0, Location::RequiresRegister());
   1883       locations->SetInAt(1, Location::Any());
   1884       break;
   1885   }
   1886   if (!cond->IsEmittedAtUseSite()) {
   1887     locations->SetOut(Location::RequiresRegister());
   1888   }
   1889 }
   1890 
   1891 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
   1892   if (cond->IsEmittedAtUseSite()) {
   1893     return;
   1894   }
   1895 
   1896   LocationSummary* locations = cond->GetLocations();
   1897   Location lhs = locations->InAt(0);
   1898   Location rhs = locations->InAt(1);
   1899   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
   1900   NearLabel true_label, false_label;
   1901 
   1902   switch (cond->InputAt(0)->GetType()) {
   1903     default:
   1904       // Integer case.
   1905 
   1906       // Clear output register: setcc only sets the low byte.
   1907       __ xorl(reg, reg);
   1908 
   1909       codegen_->GenerateIntCompare(lhs, rhs);
   1910       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
   1911       return;
   1912     case DataType::Type::kInt64:
   1913       // Clear output register: setcc only sets the low byte.
   1914       __ xorl(reg, reg);
   1915 
   1916       codegen_->GenerateLongCompare(lhs, rhs);
   1917       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
   1918       return;
   1919     case DataType::Type::kFloat32: {
   1920       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
   1921       if (rhs.IsConstant()) {
   1922         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
   1923         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
   1924       } else if (rhs.IsStackSlot()) {
   1925         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1926       } else {
   1927         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
   1928       }
   1929       GenerateFPJumps(cond, &true_label, &false_label);
   1930       break;
   1931     }
   1932     case DataType::Type::kFloat64: {
   1933       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
   1934       if (rhs.IsConstant()) {
   1935         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
   1936         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
   1937       } else if (rhs.IsDoubleStackSlot()) {
   1938         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1939       } else {
   1940         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
   1941       }
   1942       GenerateFPJumps(cond, &true_label, &false_label);
   1943       break;
   1944     }
   1945   }
   1946 
   1947   // Convert the jumps into the result.
   1948   NearLabel done_label;
   1949 
   1950   // False case: result = 0.
   1951   __ Bind(&false_label);
   1952   __ xorl(reg, reg);
   1953   __ jmp(&done_label);
   1954 
   1955   // True case: result = 1.
   1956   __ Bind(&true_label);
   1957   __ movl(reg, Immediate(1));
   1958   __ Bind(&done_label);
   1959 }
   1960 
   1961 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
   1962   HandleCondition(comp);
   1963 }
   1964 
   1965 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
   1966   HandleCondition(comp);
   1967 }
   1968 
   1969 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
   1970   HandleCondition(comp);
   1971 }
   1972 
   1973 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
   1974   HandleCondition(comp);
   1975 }
   1976 
   1977 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
   1978   HandleCondition(comp);
   1979 }
   1980 
   1981 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
   1982   HandleCondition(comp);
   1983 }
   1984 
   1985 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
   1986   HandleCondition(comp);
   1987 }
   1988 
   1989 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
   1990   HandleCondition(comp);
   1991 }
   1992 
   1993 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
   1994   HandleCondition(comp);
   1995 }
   1996 
   1997 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
   1998   HandleCondition(comp);
   1999 }
   2000 
   2001 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
   2002   HandleCondition(comp);
   2003 }
   2004 
   2005 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
   2006   HandleCondition(comp);
   2007 }
   2008 
   2009 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
   2010   HandleCondition(comp);
   2011 }
   2012 
   2013 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
   2014   HandleCondition(comp);
   2015 }
   2016 
   2017 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
   2018   HandleCondition(comp);
   2019 }
   2020 
   2021 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
   2022   HandleCondition(comp);
   2023 }
   2024 
   2025 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
   2026   HandleCondition(comp);
   2027 }
   2028 
   2029 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
   2030   HandleCondition(comp);
   2031 }
   2032 
   2033 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
   2034   HandleCondition(comp);
   2035 }
   2036 
   2037 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
   2038   HandleCondition(comp);
   2039 }
   2040 
   2041 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   2042   LocationSummary* locations =
   2043       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
   2044   switch (compare->InputAt(0)->GetType()) {
   2045     case DataType::Type::kBool:
   2046     case DataType::Type::kUint8:
   2047     case DataType::Type::kInt8:
   2048     case DataType::Type::kUint16:
   2049     case DataType::Type::kInt16:
   2050     case DataType::Type::kInt32:
   2051     case DataType::Type::kInt64: {
   2052       locations->SetInAt(0, Location::RequiresRegister());
   2053       locations->SetInAt(1, Location::Any());
   2054       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2055       break;
   2056     }
   2057     case DataType::Type::kFloat32:
   2058     case DataType::Type::kFloat64: {
   2059       locations->SetInAt(0, Location::RequiresFpuRegister());
   2060       locations->SetInAt(1, Location::Any());
   2061       locations->SetOut(Location::RequiresRegister());
   2062       break;
   2063     }
   2064     default:
   2065       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
   2066   }
   2067 }
   2068 
   2069 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
   2070   LocationSummary* locations = compare->GetLocations();
   2071   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   2072   Location left = locations->InAt(0);
   2073   Location right = locations->InAt(1);
   2074 
   2075   NearLabel less, greater, done;
   2076   DataType::Type type = compare->InputAt(0)->GetType();
   2077   Condition less_cond = kLess;
   2078 
   2079   switch (type) {
   2080     case DataType::Type::kBool:
   2081     case DataType::Type::kUint8:
   2082     case DataType::Type::kInt8:
   2083     case DataType::Type::kUint16:
   2084     case DataType::Type::kInt16:
   2085     case DataType::Type::kInt32: {
   2086       codegen_->GenerateIntCompare(left, right);
   2087       break;
   2088     }
   2089     case DataType::Type::kInt64: {
   2090       codegen_->GenerateLongCompare(left, right);
   2091       break;
   2092     }
   2093     case DataType::Type::kFloat32: {
   2094       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
   2095       if (right.IsConstant()) {
   2096         float value = right.GetConstant()->AsFloatConstant()->GetValue();
   2097         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
   2098       } else if (right.IsStackSlot()) {
   2099         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   2100       } else {
   2101         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
   2102       }
   2103       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
   2104       less_cond = kBelow;  //  ucomis{s,d} sets CF
   2105       break;
   2106     }
   2107     case DataType::Type::kFloat64: {
   2108       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
   2109       if (right.IsConstant()) {
   2110         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
   2111         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
   2112       } else if (right.IsDoubleStackSlot()) {
   2113         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   2114       } else {
   2115         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
   2116       }
   2117       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
   2118       less_cond = kBelow;  //  ucomis{s,d} sets CF
   2119       break;
   2120     }
   2121     default:
   2122       LOG(FATAL) << "Unexpected compare type " << type;
   2123   }
   2124 
   2125   __ movl(out, Immediate(0));
   2126   __ j(kEqual, &done);
   2127   __ j(less_cond, &less);
   2128 
   2129   __ Bind(&greater);
   2130   __ movl(out, Immediate(1));
   2131   __ jmp(&done);
   2132 
   2133   __ Bind(&less);
   2134   __ movl(out, Immediate(-1));
   2135 
   2136   __ Bind(&done);
   2137 }
   2138 
   2139 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   2140   LocationSummary* locations =
   2141       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
   2142   locations->SetOut(Location::ConstantLocation(constant));
   2143 }
   2144 
   2145 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   2146   // Will be generated at use site.
   2147 }
   2148 
   2149 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
   2150   LocationSummary* locations =
   2151       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
   2152   locations->SetOut(Location::ConstantLocation(constant));
   2153 }
   2154 
   2155 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   2156   // Will be generated at use site.
   2157 }
   2158 
   2159 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
   2160   LocationSummary* locations =
   2161       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
   2162   locations->SetOut(Location::ConstantLocation(constant));
   2163 }
   2164 
   2165 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   2166   // Will be generated at use site.
   2167 }
   2168 
   2169 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
   2170   LocationSummary* locations =
   2171       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
   2172   locations->SetOut(Location::ConstantLocation(constant));
   2173 }
   2174 
   2175 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   2176   // Will be generated at use site.
   2177 }
   2178 
   2179 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
   2180   LocationSummary* locations =
   2181       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
   2182   locations->SetOut(Location::ConstantLocation(constant));
   2183 }
   2184 
   2185 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
   2186     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   2187   // Will be generated at use site.
   2188 }
   2189 
   2190 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
   2191   constructor_fence->SetLocations(nullptr);
   2192 }
   2193 
   2194 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
   2195     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
   2196   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
   2197 }
   2198 
   2199 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   2200   memory_barrier->SetLocations(nullptr);
   2201 }
   2202 
   2203 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   2204   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
   2205 }
   2206 
   2207 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
   2208   ret->SetLocations(nullptr);
   2209 }
   2210 
   2211 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
   2212   codegen_->GenerateFrameExit();
   2213 }
   2214 
   2215 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
   2216   LocationSummary* locations =
   2217       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
   2218   switch (ret->InputAt(0)->GetType()) {
   2219     case DataType::Type::kReference:
   2220     case DataType::Type::kBool:
   2221     case DataType::Type::kUint8:
   2222     case DataType::Type::kInt8:
   2223     case DataType::Type::kUint16:
   2224     case DataType::Type::kInt16:
   2225     case DataType::Type::kInt32:
   2226     case DataType::Type::kInt64:
   2227       locations->SetInAt(0, Location::RegisterLocation(RAX));
   2228       break;
   2229 
   2230     case DataType::Type::kFloat32:
   2231     case DataType::Type::kFloat64:
   2232       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
   2233       break;
   2234 
   2235     default:
   2236       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   2237   }
   2238 }
   2239 
   2240 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
   2241   if (kIsDebugBuild) {
   2242     switch (ret->InputAt(0)->GetType()) {
   2243       case DataType::Type::kReference:
   2244       case DataType::Type::kBool:
   2245       case DataType::Type::kUint8:
   2246       case DataType::Type::kInt8:
   2247       case DataType::Type::kUint16:
   2248       case DataType::Type::kInt16:
   2249       case DataType::Type::kInt32:
   2250       case DataType::Type::kInt64:
   2251         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
   2252         break;
   2253 
   2254       case DataType::Type::kFloat32:
   2255       case DataType::Type::kFloat64:
   2256         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
   2257                   XMM0);
   2258         break;
   2259 
   2260       default:
   2261         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   2262     }
   2263   }
   2264   codegen_->GenerateFrameExit();
   2265 }
   2266 
   2267 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
   2268   switch (type) {
   2269     case DataType::Type::kReference:
   2270     case DataType::Type::kBool:
   2271     case DataType::Type::kUint8:
   2272     case DataType::Type::kInt8:
   2273     case DataType::Type::kUint16:
   2274     case DataType::Type::kInt16:
   2275     case DataType::Type::kUint32:
   2276     case DataType::Type::kInt32:
   2277     case DataType::Type::kUint64:
   2278     case DataType::Type::kInt64:
   2279       return Location::RegisterLocation(RAX);
   2280 
   2281     case DataType::Type::kVoid:
   2282       return Location::NoLocation();
   2283 
   2284     case DataType::Type::kFloat64:
   2285     case DataType::Type::kFloat32:
   2286       return Location::FpuRegisterLocation(XMM0);
   2287   }
   2288 
   2289   UNREACHABLE();
   2290 }
   2291 
   2292 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
   2293   return Location::RegisterLocation(kMethodRegisterArgument);
   2294 }
   2295 
   2296 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
   2297   switch (type) {
   2298     case DataType::Type::kReference:
   2299     case DataType::Type::kBool:
   2300     case DataType::Type::kUint8:
   2301     case DataType::Type::kInt8:
   2302     case DataType::Type::kUint16:
   2303     case DataType::Type::kInt16:
   2304     case DataType::Type::kInt32: {
   2305       uint32_t index = gp_index_++;
   2306       stack_index_++;
   2307       if (index < calling_convention.GetNumberOfRegisters()) {
   2308         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
   2309       } else {
   2310         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
   2311       }
   2312     }
   2313 
   2314     case DataType::Type::kInt64: {
   2315       uint32_t index = gp_index_;
   2316       stack_index_ += 2;
   2317       if (index < calling_convention.GetNumberOfRegisters()) {
   2318         gp_index_ += 1;
   2319         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
   2320       } else {
   2321         gp_index_ += 2;
   2322         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
   2323       }
   2324     }
   2325 
   2326     case DataType::Type::kFloat32: {
   2327       uint32_t index = float_index_++;
   2328       stack_index_++;
   2329       if (index < calling_convention.GetNumberOfFpuRegisters()) {
   2330         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
   2331       } else {
   2332         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
   2333       }
   2334     }
   2335 
   2336     case DataType::Type::kFloat64: {
   2337       uint32_t index = float_index_++;
   2338       stack_index_ += 2;
   2339       if (index < calling_convention.GetNumberOfFpuRegisters()) {
   2340         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
   2341       } else {
   2342         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
   2343       }
   2344     }
   2345 
   2346     case DataType::Type::kUint32:
   2347     case DataType::Type::kUint64:
   2348     case DataType::Type::kVoid:
   2349       LOG(FATAL) << "Unexpected parameter type " << type;
   2350       break;
   2351   }
   2352   return Location::NoLocation();
   2353 }
   2354 
   2355 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   2356   // The trampoline uses the same calling convention as dex calling conventions,
   2357   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
   2358   // the method_idx.
   2359   HandleInvoke(invoke);
   2360 }
   2361 
   2362 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   2363   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
   2364 }
   2365 
   2366 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   2367   // Explicit clinit checks triggered by static invokes must have been pruned by
   2368   // art::PrepareForRegisterAllocation.
   2369   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   2370 
   2371   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   2372   if (intrinsic.TryDispatch(invoke)) {
   2373     return;
   2374   }
   2375 
   2376   HandleInvoke(invoke);
   2377 }
   2378 
   2379 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
   2380   if (invoke->GetLocations()->Intrinsified()) {
   2381     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
   2382     intrinsic.Dispatch(invoke);
   2383     return true;
   2384   }
   2385   return false;
   2386 }
   2387 
   2388 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   2389   // Explicit clinit checks triggered by static invokes must have been pruned by
   2390   // art::PrepareForRegisterAllocation.
   2391   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   2392 
   2393   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   2394     return;
   2395   }
   2396 
   2397   LocationSummary* locations = invoke->GetLocations();
   2398   codegen_->GenerateStaticOrDirectCall(
   2399       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
   2400 }
   2401 
   2402 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
   2403   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
   2404   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
   2405 }
   2406 
   2407 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   2408   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   2409   if (intrinsic.TryDispatch(invoke)) {
   2410     return;
   2411   }
   2412 
   2413   HandleInvoke(invoke);
   2414 }
   2415 
   2416 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   2417   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   2418     return;
   2419   }
   2420 
   2421   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   2422   DCHECK(!codegen_->IsLeafMethod());
   2423 }
   2424 
   2425 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   2426   HandleInvoke(invoke);
   2427   // Add the hidden argument.
   2428   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
   2429 }
   2430 
   2431 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   2432   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   2433   LocationSummary* locations = invoke->GetLocations();
   2434   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   2435   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   2436   Location receiver = locations->InAt(0);
   2437   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
   2438 
   2439   // Set the hidden argument. This is safe to do this here, as RAX
   2440   // won't be modified thereafter, before the `call` instruction.
   2441   DCHECK_EQ(RAX, hidden_reg.AsRegister());
   2442   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
   2443 
   2444   if (receiver.IsStackSlot()) {
   2445     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
   2446     // /* HeapReference<Class> */ temp = temp->klass_
   2447     __ movl(temp, Address(temp, class_offset));
   2448   } else {
   2449     // /* HeapReference<Class> */ temp = receiver->klass_
   2450     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   2451   }
   2452   codegen_->MaybeRecordImplicitNullCheck(invoke);
   2453   // Instead of simply (possibly) unpoisoning `temp` here, we should
   2454   // emit a read barrier for the previous class reference load.
   2455   // However this is not required in practice, as this is an
   2456   // intermediate/temporary reference and because the current
   2457   // concurrent copying collector keeps the from-space memory
   2458   // intact/accessible until the end of the marking phase (the
   2459   // concurrent copying collector may not in the future).
   2460   __ MaybeUnpoisonHeapReference(temp);
   2461   // temp = temp->GetAddressOfIMT()
   2462   __ movq(temp,
   2463       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   2464   // temp = temp->GetImtEntryAt(method_offset);
   2465   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   2466       invoke->GetImtIndex(), kX86_64PointerSize));
   2467   // temp = temp->GetImtEntryAt(method_offset);
   2468   __ movq(temp, Address(temp, method_offset));
   2469   // call temp->GetEntryPoint();
   2470   __ call(Address(
   2471       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
   2472 
   2473   DCHECK(!codegen_->IsLeafMethod());
   2474   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   2475 }
   2476 
   2477 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   2478   HandleInvoke(invoke);
   2479 }
   2480 
   2481 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   2482   codegen_->GenerateInvokePolymorphicCall(invoke);
   2483 }
   2484 
   2485 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
   2486   LocationSummary* locations =
   2487       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
   2488   switch (neg->GetResultType()) {
   2489     case DataType::Type::kInt32:
   2490     case DataType::Type::kInt64:
   2491       locations->SetInAt(0, Location::RequiresRegister());
   2492       locations->SetOut(Location::SameAsFirstInput());
   2493       break;
   2494 
   2495     case DataType::Type::kFloat32:
   2496     case DataType::Type::kFloat64:
   2497       locations->SetInAt(0, Location::RequiresFpuRegister());
   2498       locations->SetOut(Location::SameAsFirstInput());
   2499       locations->AddTemp(Location::RequiresFpuRegister());
   2500       break;
   2501 
   2502     default:
   2503       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   2504   }
   2505 }
   2506 
   2507 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
   2508   LocationSummary* locations = neg->GetLocations();
   2509   Location out = locations->Out();
   2510   Location in = locations->InAt(0);
   2511   switch (neg->GetResultType()) {
   2512     case DataType::Type::kInt32:
   2513       DCHECK(in.IsRegister());
   2514       DCHECK(in.Equals(out));
   2515       __ negl(out.AsRegister<CpuRegister>());
   2516       break;
   2517 
   2518     case DataType::Type::kInt64:
   2519       DCHECK(in.IsRegister());
   2520       DCHECK(in.Equals(out));
   2521       __ negq(out.AsRegister<CpuRegister>());
   2522       break;
   2523 
   2524     case DataType::Type::kFloat32: {
   2525       DCHECK(in.Equals(out));
   2526       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2527       // Implement float negation with an exclusive or with value
   2528       // 0x80000000 (mask for bit 31, representing the sign of a
   2529       // single-precision floating-point number).
   2530       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
   2531       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
   2532       break;
   2533     }
   2534 
   2535     case DataType::Type::kFloat64: {
   2536       DCHECK(in.Equals(out));
   2537       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2538       // Implement double negation with an exclusive or with value
   2539       // 0x8000000000000000 (mask for bit 63, representing the sign of
   2540       // a double-precision floating-point number).
   2541       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
   2542       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
   2543       break;
   2544     }
   2545 
   2546     default:
   2547       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   2548   }
   2549 }
   2550 
   2551 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
   2552   LocationSummary* locations =
   2553       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
   2554   DataType::Type result_type = conversion->GetResultType();
   2555   DataType::Type input_type = conversion->GetInputType();
   2556   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
   2557       << input_type << " -> " << result_type;
   2558 
   2559   switch (result_type) {
   2560     case DataType::Type::kUint8:
   2561     case DataType::Type::kInt8:
   2562     case DataType::Type::kUint16:
   2563     case DataType::Type::kInt16:
   2564       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
   2565       locations->SetInAt(0, Location::Any());
   2566       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2567       break;
   2568 
   2569     case DataType::Type::kInt32:
   2570       switch (input_type) {
   2571         case DataType::Type::kInt64:
   2572           locations->SetInAt(0, Location::Any());
   2573           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2574           break;
   2575 
   2576         case DataType::Type::kFloat32:
   2577           locations->SetInAt(0, Location::RequiresFpuRegister());
   2578           locations->SetOut(Location::RequiresRegister());
   2579           break;
   2580 
   2581         case DataType::Type::kFloat64:
   2582           locations->SetInAt(0, Location::RequiresFpuRegister());
   2583           locations->SetOut(Location::RequiresRegister());
   2584           break;
   2585 
   2586         default:
   2587           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2588                      << " to " << result_type;
   2589       }
   2590       break;
   2591 
   2592     case DataType::Type::kInt64:
   2593       switch (input_type) {
   2594         case DataType::Type::kBool:
   2595         case DataType::Type::kUint8:
   2596         case DataType::Type::kInt8:
   2597         case DataType::Type::kUint16:
   2598         case DataType::Type::kInt16:
   2599         case DataType::Type::kInt32:
   2600           // TODO: We would benefit from a (to-be-implemented)
   2601           // Location::RegisterOrStackSlot requirement for this input.
   2602           locations->SetInAt(0, Location::RequiresRegister());
   2603           locations->SetOut(Location::RequiresRegister());
   2604           break;
   2605 
   2606         case DataType::Type::kFloat32:
   2607           locations->SetInAt(0, Location::RequiresFpuRegister());
   2608           locations->SetOut(Location::RequiresRegister());
   2609           break;
   2610 
   2611         case DataType::Type::kFloat64:
   2612           locations->SetInAt(0, Location::RequiresFpuRegister());
   2613           locations->SetOut(Location::RequiresRegister());
   2614           break;
   2615 
   2616         default:
   2617           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2618                      << " to " << result_type;
   2619       }
   2620       break;
   2621 
   2622     case DataType::Type::kFloat32:
   2623       switch (input_type) {
   2624         case DataType::Type::kBool:
   2625         case DataType::Type::kUint8:
   2626         case DataType::Type::kInt8:
   2627         case DataType::Type::kUint16:
   2628         case DataType::Type::kInt16:
   2629         case DataType::Type::kInt32:
   2630           locations->SetInAt(0, Location::Any());
   2631           locations->SetOut(Location::RequiresFpuRegister());
   2632           break;
   2633 
   2634         case DataType::Type::kInt64:
   2635           locations->SetInAt(0, Location::Any());
   2636           locations->SetOut(Location::RequiresFpuRegister());
   2637           break;
   2638 
   2639         case DataType::Type::kFloat64:
   2640           locations->SetInAt(0, Location::Any());
   2641           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2642           break;
   2643 
   2644         default:
   2645           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2646                      << " to " << result_type;
   2647       }
   2648       break;
   2649 
   2650     case DataType::Type::kFloat64:
   2651       switch (input_type) {
   2652         case DataType::Type::kBool:
   2653         case DataType::Type::kUint8:
   2654         case DataType::Type::kInt8:
   2655         case DataType::Type::kUint16:
   2656         case DataType::Type::kInt16:
   2657         case DataType::Type::kInt32:
   2658           locations->SetInAt(0, Location::Any());
   2659           locations->SetOut(Location::RequiresFpuRegister());
   2660           break;
   2661 
   2662         case DataType::Type::kInt64:
   2663           locations->SetInAt(0, Location::Any());
   2664           locations->SetOut(Location::RequiresFpuRegister());
   2665           break;
   2666 
   2667         case DataType::Type::kFloat32:
   2668           locations->SetInAt(0, Location::Any());
   2669           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2670           break;
   2671 
   2672         default:
   2673           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2674                      << " to " << result_type;
   2675       }
   2676       break;
   2677 
   2678     default:
   2679       LOG(FATAL) << "Unexpected type conversion from " << input_type
   2680                  << " to " << result_type;
   2681   }
   2682 }
   2683 
   2684 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
   2685   LocationSummary* locations = conversion->GetLocations();
   2686   Location out = locations->Out();
   2687   Location in = locations->InAt(0);
   2688   DataType::Type result_type = conversion->GetResultType();
   2689   DataType::Type input_type = conversion->GetInputType();
   2690   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
   2691       << input_type << " -> " << result_type;
   2692   switch (result_type) {
   2693     case DataType::Type::kUint8:
   2694       switch (input_type) {
   2695         case DataType::Type::kInt8:
   2696         case DataType::Type::kUint16:
   2697         case DataType::Type::kInt16:
   2698         case DataType::Type::kInt32:
   2699         case DataType::Type::kInt64:
   2700           if (in.IsRegister()) {
   2701             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2702           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2703             __ movzxb(out.AsRegister<CpuRegister>(),
   2704                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2705           } else {
   2706             __ movl(out.AsRegister<CpuRegister>(),
   2707                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
   2708           }
   2709           break;
   2710 
   2711         default:
   2712           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2713                      << " to " << result_type;
   2714       }
   2715       break;
   2716 
   2717     case DataType::Type::kInt8:
   2718       switch (input_type) {
   2719         case DataType::Type::kUint8:
   2720         case DataType::Type::kUint16:
   2721         case DataType::Type::kInt16:
   2722         case DataType::Type::kInt32:
   2723         case DataType::Type::kInt64:
   2724           if (in.IsRegister()) {
   2725             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2726           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2727             __ movsxb(out.AsRegister<CpuRegister>(),
   2728                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2729           } else {
   2730             __ movl(out.AsRegister<CpuRegister>(),
   2731                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
   2732           }
   2733           break;
   2734 
   2735         default:
   2736           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2737                      << " to " << result_type;
   2738       }
   2739       break;
   2740 
   2741     case DataType::Type::kUint16:
   2742       switch (input_type) {
   2743         case DataType::Type::kInt8:
   2744         case DataType::Type::kInt16:
   2745         case DataType::Type::kInt32:
   2746         case DataType::Type::kInt64:
   2747           if (in.IsRegister()) {
   2748             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2749           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2750             __ movzxw(out.AsRegister<CpuRegister>(),
   2751                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2752           } else {
   2753             __ movl(out.AsRegister<CpuRegister>(),
   2754                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
   2755           }
   2756           break;
   2757 
   2758         default:
   2759           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2760                      << " to " << result_type;
   2761       }
   2762       break;
   2763 
   2764     case DataType::Type::kInt16:
   2765       switch (input_type) {
   2766         case DataType::Type::kUint16:
   2767         case DataType::Type::kInt32:
   2768         case DataType::Type::kInt64:
   2769           if (in.IsRegister()) {
   2770             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2771           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2772             __ movsxw(out.AsRegister<CpuRegister>(),
   2773                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2774           } else {
   2775             __ movl(out.AsRegister<CpuRegister>(),
   2776                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
   2777           }
   2778           break;
   2779 
   2780         default:
   2781           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2782                      << " to " << result_type;
   2783       }
   2784       break;
   2785 
   2786     case DataType::Type::kInt32:
   2787       switch (input_type) {
   2788         case DataType::Type::kInt64:
   2789           if (in.IsRegister()) {
   2790             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2791           } else if (in.IsDoubleStackSlot()) {
   2792             __ movl(out.AsRegister<CpuRegister>(),
   2793                     Address(CpuRegister(RSP), in.GetStackIndex()));
   2794           } else {
   2795             DCHECK(in.IsConstant());
   2796             DCHECK(in.GetConstant()->IsLongConstant());
   2797             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
   2798             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
   2799           }
   2800           break;
   2801 
   2802         case DataType::Type::kFloat32: {
   2803           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2804           CpuRegister output = out.AsRegister<CpuRegister>();
   2805           NearLabel done, nan;
   2806 
   2807           __ movl(output, Immediate(kPrimIntMax));
   2808           // if input >= (float)INT_MAX goto done
   2809           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
   2810           __ j(kAboveEqual, &done);
   2811           // if input == NaN goto nan
   2812           __ j(kUnordered, &nan);
   2813           // output = float-to-int-truncate(input)
   2814           __ cvttss2si(output, input, false);
   2815           __ jmp(&done);
   2816           __ Bind(&nan);
   2817           //  output = 0
   2818           __ xorl(output, output);
   2819           __ Bind(&done);
   2820           break;
   2821         }
   2822 
   2823         case DataType::Type::kFloat64: {
   2824           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2825           CpuRegister output = out.AsRegister<CpuRegister>();
   2826           NearLabel done, nan;
   2827 
   2828           __ movl(output, Immediate(kPrimIntMax));
   2829           // if input >= (double)INT_MAX goto done
   2830           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
   2831           __ j(kAboveEqual, &done);
   2832           // if input == NaN goto nan
   2833           __ j(kUnordered, &nan);
   2834           // output = double-to-int-truncate(input)
   2835           __ cvttsd2si(output, input);
   2836           __ jmp(&done);
   2837           __ Bind(&nan);
   2838           //  output = 0
   2839           __ xorl(output, output);
   2840           __ Bind(&done);
   2841           break;
   2842         }
   2843 
   2844         default:
   2845           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2846                      << " to " << result_type;
   2847       }
   2848       break;
   2849 
   2850     case DataType::Type::kInt64:
   2851       switch (input_type) {
   2852         DCHECK(out.IsRegister());
   2853         case DataType::Type::kBool:
   2854         case DataType::Type::kUint8:
   2855         case DataType::Type::kInt8:
   2856         case DataType::Type::kUint16:
   2857         case DataType::Type::kInt16:
   2858         case DataType::Type::kInt32:
   2859           DCHECK(in.IsRegister());
   2860           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2861           break;
   2862 
   2863         case DataType::Type::kFloat32: {
   2864           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2865           CpuRegister output = out.AsRegister<CpuRegister>();
   2866           NearLabel done, nan;
   2867 
   2868           codegen_->Load64BitValue(output, kPrimLongMax);
   2869           // if input >= (float)LONG_MAX goto done
   2870           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
   2871           __ j(kAboveEqual, &done);
   2872           // if input == NaN goto nan
   2873           __ j(kUnordered, &nan);
   2874           // output = float-to-long-truncate(input)
   2875           __ cvttss2si(output, input, true);
   2876           __ jmp(&done);
   2877           __ Bind(&nan);
   2878           //  output = 0
   2879           __ xorl(output, output);
   2880           __ Bind(&done);
   2881           break;
   2882         }
   2883 
   2884         case DataType::Type::kFloat64: {
   2885           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2886           CpuRegister output = out.AsRegister<CpuRegister>();
   2887           NearLabel done, nan;
   2888 
   2889           codegen_->Load64BitValue(output, kPrimLongMax);
   2890           // if input >= (double)LONG_MAX goto done
   2891           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
   2892           __ j(kAboveEqual, &done);
   2893           // if input == NaN goto nan
   2894           __ j(kUnordered, &nan);
   2895           // output = double-to-long-truncate(input)
   2896           __ cvttsd2si(output, input, true);
   2897           __ jmp(&done);
   2898           __ Bind(&nan);
   2899           //  output = 0
   2900           __ xorl(output, output);
   2901           __ Bind(&done);
   2902           break;
   2903         }
   2904 
   2905         default:
   2906           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2907                      << " to " << result_type;
   2908       }
   2909       break;
   2910 
   2911     case DataType::Type::kFloat32:
   2912       switch (input_type) {
   2913         case DataType::Type::kBool:
   2914         case DataType::Type::kUint8:
   2915         case DataType::Type::kInt8:
   2916         case DataType::Type::kUint16:
   2917         case DataType::Type::kInt16:
   2918         case DataType::Type::kInt32:
   2919           if (in.IsRegister()) {
   2920             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
   2921           } else if (in.IsConstant()) {
   2922             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
   2923             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2924             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2925           } else {
   2926             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
   2927                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
   2928           }
   2929           break;
   2930 
   2931         case DataType::Type::kInt64:
   2932           if (in.IsRegister()) {
   2933             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
   2934           } else if (in.IsConstant()) {
   2935             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
   2936             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2937             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2938           } else {
   2939             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
   2940                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
   2941           }
   2942           break;
   2943 
   2944         case DataType::Type::kFloat64:
   2945           if (in.IsFpuRegister()) {
   2946             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
   2947           } else if (in.IsConstant()) {
   2948             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
   2949             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2950             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2951           } else {
   2952             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
   2953                         Address(CpuRegister(RSP), in.GetStackIndex()));
   2954           }
   2955           break;
   2956 
   2957         default:
   2958           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2959                      << " to " << result_type;
   2960       }
   2961       break;
   2962 
   2963     case DataType::Type::kFloat64:
   2964       switch (input_type) {
   2965         case DataType::Type::kBool:
   2966         case DataType::Type::kUint8:
   2967         case DataType::Type::kInt8:
   2968         case DataType::Type::kUint16:
   2969         case DataType::Type::kInt16:
   2970         case DataType::Type::kInt32:
   2971           if (in.IsRegister()) {
   2972             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
   2973           } else if (in.IsConstant()) {
   2974             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
   2975             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2976             codegen_->Load64BitValue(dest, static_cast<double>(v));
   2977           } else {
   2978             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
   2979                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
   2980           }
   2981           break;
   2982 
   2983         case DataType::Type::kInt64:
   2984           if (in.IsRegister()) {
   2985             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
   2986           } else if (in.IsConstant()) {
   2987             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
   2988             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2989             codegen_->Load64BitValue(dest, static_cast<double>(v));
   2990           } else {
   2991             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
   2992                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
   2993           }
   2994           break;
   2995 
   2996         case DataType::Type::kFloat32:
   2997           if (in.IsFpuRegister()) {
   2998             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
   2999           } else if (in.IsConstant()) {
   3000             float v = in.GetConstant()->AsFloatConstant()->GetValue();
   3001             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   3002             codegen_->Load64BitValue(dest, static_cast<double>(v));
   3003           } else {
   3004             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
   3005                         Address(CpuRegister(RSP), in.GetStackIndex()));
   3006           }
   3007           break;
   3008 
   3009         default:
   3010           LOG(FATAL) << "Unexpected type conversion from " << input_type
   3011                      << " to " << result_type;
   3012       }
   3013       break;
   3014 
   3015     default:
   3016       LOG(FATAL) << "Unexpected type conversion from " << input_type
   3017                  << " to " << result_type;
   3018   }
   3019 }
   3020 
   3021 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
   3022   LocationSummary* locations =
   3023       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
   3024   switch (add->GetResultType()) {
   3025     case DataType::Type::kInt32: {
   3026       locations->SetInAt(0, Location::RequiresRegister());
   3027       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
   3028       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3029       break;
   3030     }
   3031 
   3032     case DataType::Type::kInt64: {
   3033       locations->SetInAt(0, Location::RequiresRegister());
   3034       // We can use a leaq or addq if the constant can fit in an immediate.
   3035       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
   3036       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3037       break;
   3038     }
   3039 
   3040     case DataType::Type::kFloat64:
   3041     case DataType::Type::kFloat32: {
   3042       locations->SetInAt(0, Location::RequiresFpuRegister());
   3043       locations->SetInAt(1, Location::Any());
   3044       locations->SetOut(Location::SameAsFirstInput());
   3045       break;
   3046     }
   3047 
   3048     default:
   3049       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   3050   }
   3051 }
   3052 
   3053 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   3054   LocationSummary* locations = add->GetLocations();
   3055   Location first = locations->InAt(0);
   3056   Location second = locations->InAt(1);
   3057   Location out = locations->Out();
   3058 
   3059   switch (add->GetResultType()) {
   3060     case DataType::Type::kInt32: {
   3061       if (second.IsRegister()) {
   3062         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3063           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3064         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
   3065           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
   3066         } else {
   3067           __ leal(out.AsRegister<CpuRegister>(), Address(
   3068               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
   3069         }
   3070       } else if (second.IsConstant()) {
   3071         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3072           __ addl(out.AsRegister<CpuRegister>(),
   3073                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
   3074         } else {
   3075           __ leal(out.AsRegister<CpuRegister>(), Address(
   3076               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
   3077         }
   3078       } else {
   3079         DCHECK(first.Equals(locations->Out()));
   3080         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
   3081       }
   3082       break;
   3083     }
   3084 
   3085     case DataType::Type::kInt64: {
   3086       if (second.IsRegister()) {
   3087         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3088           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3089         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
   3090           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
   3091         } else {
   3092           __ leaq(out.AsRegister<CpuRegister>(), Address(
   3093               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
   3094         }
   3095       } else {
   3096         DCHECK(second.IsConstant());
   3097         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
   3098         int32_t int32_value = Low32Bits(value);
   3099         DCHECK_EQ(int32_value, value);
   3100         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   3101           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
   3102         } else {
   3103           __ leaq(out.AsRegister<CpuRegister>(), Address(
   3104               first.AsRegister<CpuRegister>(), int32_value));
   3105         }
   3106       }
   3107       break;
   3108     }
   3109 
   3110     case DataType::Type::kFloat32: {
   3111       if (second.IsFpuRegister()) {
   3112         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3113       } else if (second.IsConstant()) {
   3114         __ addss(first.AsFpuRegister<XmmRegister>(),
   3115                  codegen_->LiteralFloatAddress(
   3116                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3117       } else {
   3118         DCHECK(second.IsStackSlot());
   3119         __ addss(first.AsFpuRegister<XmmRegister>(),
   3120                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3121       }
   3122       break;
   3123     }
   3124 
   3125     case DataType::Type::kFloat64: {
   3126       if (second.IsFpuRegister()) {
   3127         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3128       } else if (second.IsConstant()) {
   3129         __ addsd(first.AsFpuRegister<XmmRegister>(),
   3130                  codegen_->LiteralDoubleAddress(
   3131                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3132       } else {
   3133         DCHECK(second.IsDoubleStackSlot());
   3134         __ addsd(first.AsFpuRegister<XmmRegister>(),
   3135                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3136       }
   3137       break;
   3138     }
   3139 
   3140     default:
   3141       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   3142   }
   3143 }
   3144 
   3145 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
   3146   LocationSummary* locations =
   3147       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
   3148   switch (sub->GetResultType()) {
   3149     case DataType::Type::kInt32: {
   3150       locations->SetInAt(0, Location::RequiresRegister());
   3151       locations->SetInAt(1, Location::Any());
   3152       locations->SetOut(Location::SameAsFirstInput());
   3153       break;
   3154     }
   3155     case DataType::Type::kInt64: {
   3156       locations->SetInAt(0, Location::RequiresRegister());
   3157       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
   3158       locations->SetOut(Location::SameAsFirstInput());
   3159       break;
   3160     }
   3161     case DataType::Type::kFloat32:
   3162     case DataType::Type::kFloat64: {
   3163       locations->SetInAt(0, Location::RequiresFpuRegister());
   3164       locations->SetInAt(1, Location::Any());
   3165       locations->SetOut(Location::SameAsFirstInput());
   3166       break;
   3167     }
   3168     default:
   3169       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   3170   }
   3171 }
   3172 
   3173 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
   3174   LocationSummary* locations = sub->GetLocations();
   3175   Location first = locations->InAt(0);
   3176   Location second = locations->InAt(1);
   3177   DCHECK(first.Equals(locations->Out()));
   3178   switch (sub->GetResultType()) {
   3179     case DataType::Type::kInt32: {
   3180       if (second.IsRegister()) {
   3181         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3182       } else if (second.IsConstant()) {
   3183         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
   3184         __ subl(first.AsRegister<CpuRegister>(), imm);
   3185       } else {
   3186         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
   3187       }
   3188       break;
   3189     }
   3190     case DataType::Type::kInt64: {
   3191       if (second.IsConstant()) {
   3192         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
   3193         DCHECK(IsInt<32>(value));
   3194         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
   3195       } else {
   3196         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3197       }
   3198       break;
   3199     }
   3200 
   3201     case DataType::Type::kFloat32: {
   3202       if (second.IsFpuRegister()) {
   3203         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3204       } else if (second.IsConstant()) {
   3205         __ subss(first.AsFpuRegister<XmmRegister>(),
   3206                  codegen_->LiteralFloatAddress(
   3207                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3208       } else {
   3209         DCHECK(second.IsStackSlot());
   3210         __ subss(first.AsFpuRegister<XmmRegister>(),
   3211                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3212       }
   3213       break;
   3214     }
   3215 
   3216     case DataType::Type::kFloat64: {
   3217       if (second.IsFpuRegister()) {
   3218         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3219       } else if (second.IsConstant()) {
   3220         __ subsd(first.AsFpuRegister<XmmRegister>(),
   3221                  codegen_->LiteralDoubleAddress(
   3222                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3223       } else {
   3224         DCHECK(second.IsDoubleStackSlot());
   3225         __ subsd(first.AsFpuRegister<XmmRegister>(),
   3226                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3227       }
   3228       break;
   3229     }
   3230 
   3231     default:
   3232       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   3233   }
   3234 }
   3235 
   3236 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
   3237   LocationSummary* locations =
   3238       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
   3239   switch (mul->GetResultType()) {
   3240     case DataType::Type::kInt32: {
   3241       locations->SetInAt(0, Location::RequiresRegister());
   3242       locations->SetInAt(1, Location::Any());
   3243       if (mul->InputAt(1)->IsIntConstant()) {
   3244         // Can use 3 operand multiply.
   3245         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3246       } else {
   3247         locations->SetOut(Location::SameAsFirstInput());
   3248       }
   3249       break;
   3250     }
   3251     case DataType::Type::kInt64: {
   3252       locations->SetInAt(0, Location::RequiresRegister());
   3253       locations->SetInAt(1, Location::Any());
   3254       if (mul->InputAt(1)->IsLongConstant() &&
   3255           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
   3256         // Can use 3 operand multiply.
   3257         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3258       } else {
   3259         locations->SetOut(Location::SameAsFirstInput());
   3260       }
   3261       break;
   3262     }
   3263     case DataType::Type::kFloat32:
   3264     case DataType::Type::kFloat64: {
   3265       locations->SetInAt(0, Location::RequiresFpuRegister());
   3266       locations->SetInAt(1, Location::Any());
   3267       locations->SetOut(Location::SameAsFirstInput());
   3268       break;
   3269     }
   3270 
   3271     default:
   3272       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   3273   }
   3274 }
   3275 
   3276 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
   3277   LocationSummary* locations = mul->GetLocations();
   3278   Location first = locations->InAt(0);
   3279   Location second = locations->InAt(1);
   3280   Location out = locations->Out();
   3281   switch (mul->GetResultType()) {
   3282     case DataType::Type::kInt32:
   3283       // The constant may have ended up in a register, so test explicitly to avoid
   3284       // problems where the output may not be the same as the first operand.
   3285       if (mul->InputAt(1)->IsIntConstant()) {
   3286         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
   3287         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
   3288       } else if (second.IsRegister()) {
   3289         DCHECK(first.Equals(out));
   3290         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3291       } else {
   3292         DCHECK(first.Equals(out));
   3293         DCHECK(second.IsStackSlot());
   3294         __ imull(first.AsRegister<CpuRegister>(),
   3295                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3296       }
   3297       break;
   3298     case DataType::Type::kInt64: {
   3299       // The constant may have ended up in a register, so test explicitly to avoid
   3300       // problems where the output may not be the same as the first operand.
   3301       if (mul->InputAt(1)->IsLongConstant()) {
   3302         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
   3303         if (IsInt<32>(value)) {
   3304           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
   3305                    Immediate(static_cast<int32_t>(value)));
   3306         } else {
   3307           // Have to use the constant area.
   3308           DCHECK(first.Equals(out));
   3309           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
   3310         }
   3311       } else if (second.IsRegister()) {
   3312         DCHECK(first.Equals(out));
   3313         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3314       } else {
   3315         DCHECK(second.IsDoubleStackSlot());
   3316         DCHECK(first.Equals(out));
   3317         __ imulq(first.AsRegister<CpuRegister>(),
   3318                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3319       }
   3320       break;
   3321     }
   3322 
   3323     case DataType::Type::kFloat32: {
   3324       DCHECK(first.Equals(out));
   3325       if (second.IsFpuRegister()) {
   3326         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3327       } else if (second.IsConstant()) {
   3328         __ mulss(first.AsFpuRegister<XmmRegister>(),
   3329                  codegen_->LiteralFloatAddress(
   3330                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3331       } else {
   3332         DCHECK(second.IsStackSlot());
   3333         __ mulss(first.AsFpuRegister<XmmRegister>(),
   3334                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3335       }
   3336       break;
   3337     }
   3338 
   3339     case DataType::Type::kFloat64: {
   3340       DCHECK(first.Equals(out));
   3341       if (second.IsFpuRegister()) {
   3342         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3343       } else if (second.IsConstant()) {
   3344         __ mulsd(first.AsFpuRegister<XmmRegister>(),
   3345                  codegen_->LiteralDoubleAddress(
   3346                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3347       } else {
   3348         DCHECK(second.IsDoubleStackSlot());
   3349         __ mulsd(first.AsFpuRegister<XmmRegister>(),
   3350                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3351       }
   3352       break;
   3353     }
   3354 
   3355     default:
   3356       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   3357   }
   3358 }
   3359 
   3360 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
   3361                                                      uint32_t stack_adjustment, bool is_float) {
   3362   if (source.IsStackSlot()) {
   3363     DCHECK(is_float);
   3364     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
   3365   } else if (source.IsDoubleStackSlot()) {
   3366     DCHECK(!is_float);
   3367     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
   3368   } else {
   3369     // Write the value to the temporary location on the stack and load to FP stack.
   3370     if (is_float) {
   3371       Location stack_temp = Location::StackSlot(temp_offset);
   3372       codegen_->Move(stack_temp, source);
   3373       __ flds(Address(CpuRegister(RSP), temp_offset));
   3374     } else {
   3375       Location stack_temp = Location::DoubleStackSlot(temp_offset);
   3376       codegen_->Move(stack_temp, source);
   3377       __ fldl(Address(CpuRegister(RSP), temp_offset));
   3378     }
   3379   }
   3380 }
   3381 
   3382 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
   3383   DataType::Type type = rem->GetResultType();
   3384   bool is_float = type == DataType::Type::kFloat32;
   3385   size_t elem_size = DataType::Size(type);
   3386   LocationSummary* locations = rem->GetLocations();
   3387   Location first = locations->InAt(0);
   3388   Location second = locations->InAt(1);
   3389   Location out = locations->Out();
   3390 
   3391   // Create stack space for 2 elements.
   3392   // TODO: enhance register allocator to ask for stack temporaries.
   3393   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
   3394 
   3395   // Load the values to the FP stack in reverse order, using temporaries if needed.
   3396   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
   3397   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
   3398 
   3399   // Loop doing FPREM until we stabilize.
   3400   NearLabel retry;
   3401   __ Bind(&retry);
   3402   __ fprem();
   3403 
   3404   // Move FP status to AX.
   3405   __ fstsw();
   3406 
   3407   // And see if the argument reduction is complete. This is signaled by the
   3408   // C2 FPU flag bit set to 0.
   3409   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
   3410   __ j(kNotEqual, &retry);
   3411 
   3412   // We have settled on the final value. Retrieve it into an XMM register.
   3413   // Store FP top of stack to real stack.
   3414   if (is_float) {
   3415     __ fsts(Address(CpuRegister(RSP), 0));
   3416   } else {
   3417     __ fstl(Address(CpuRegister(RSP), 0));
   3418   }
   3419 
   3420   // Pop the 2 items from the FP stack.
   3421   __ fucompp();
   3422 
   3423   // Load the value from the stack into an XMM register.
   3424   DCHECK(out.IsFpuRegister()) << out;
   3425   if (is_float) {
   3426     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
   3427   } else {
   3428     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
   3429   }
   3430 
   3431   // And remove the temporary stack space we allocated.
   3432   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
   3433 }
   3434 
   3435 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
   3436   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3437 
   3438   LocationSummary* locations = instruction->GetLocations();
   3439   Location second = locations->InAt(1);
   3440   DCHECK(second.IsConstant());
   3441 
   3442   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
   3443   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
   3444   int64_t imm = Int64FromConstant(second.GetConstant());
   3445 
   3446   DCHECK(imm == 1 || imm == -1);
   3447 
   3448   switch (instruction->GetResultType()) {
   3449     case DataType::Type::kInt32: {
   3450       if (instruction->IsRem()) {
   3451         __ xorl(output_register, output_register);
   3452       } else {
   3453         __ movl(output_register, input_register);
   3454         if (imm == -1) {
   3455           __ negl(output_register);
   3456         }
   3457       }
   3458       break;
   3459     }
   3460 
   3461     case DataType::Type::kInt64: {
   3462       if (instruction->IsRem()) {
   3463         __ xorl(output_register, output_register);
   3464       } else {
   3465         __ movq(output_register, input_register);
   3466         if (imm == -1) {
   3467           __ negq(output_register);
   3468         }
   3469       }
   3470       break;
   3471     }
   3472 
   3473     default:
   3474       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
   3475   }
   3476 }
   3477 
   3478 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   3479   LocationSummary* locations = instruction->GetLocations();
   3480   Location second = locations->InAt(1);
   3481 
   3482   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
   3483   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
   3484 
   3485   int64_t imm = Int64FromConstant(second.GetConstant());
   3486   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
   3487   uint64_t abs_imm = AbsOrMin(imm);
   3488 
   3489   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
   3490 
   3491   if (instruction->GetResultType() == DataType::Type::kInt32) {
   3492     __ leal(tmp, Address(numerator, abs_imm - 1));
   3493     __ testl(numerator, numerator);
   3494     __ cmov(kGreaterEqual, tmp, numerator);
   3495     int shift = CTZ(imm);
   3496     __ sarl(tmp, Immediate(shift));
   3497 
   3498     if (imm < 0) {
   3499       __ negl(tmp);
   3500     }
   3501 
   3502     __ movl(output_register, tmp);
   3503   } else {
   3504     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
   3505     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
   3506 
   3507     codegen_->Load64BitValue(rdx, abs_imm - 1);
   3508     __ addq(rdx, numerator);
   3509     __ testq(numerator, numerator);
   3510     __ cmov(kGreaterEqual, rdx, numerator);
   3511     int shift = CTZ(imm);
   3512     __ sarq(rdx, Immediate(shift));
   3513 
   3514     if (imm < 0) {
   3515       __ negq(rdx);
   3516     }
   3517 
   3518     __ movq(output_register, rdx);
   3519   }
   3520 }
   3521 
   3522 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
   3523   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3524 
   3525   LocationSummary* locations = instruction->GetLocations();
   3526   Location second = locations->InAt(1);
   3527 
   3528   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
   3529       : locations->GetTemp(0).AsRegister<CpuRegister>();
   3530   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
   3531   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
   3532       : locations->Out().AsRegister<CpuRegister>();
   3533   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   3534 
   3535   DCHECK_EQ(RAX, eax.AsRegister());
   3536   DCHECK_EQ(RDX, edx.AsRegister());
   3537   if (instruction->IsDiv()) {
   3538     DCHECK_EQ(RAX, out.AsRegister());
   3539   } else {
   3540     DCHECK_EQ(RDX, out.AsRegister());
   3541   }
   3542 
   3543   int64_t magic;
   3544   int shift;
   3545 
   3546   // TODO: can these branches be written as one?
   3547   if (instruction->GetResultType() == DataType::Type::kInt32) {
   3548     int imm = second.GetConstant()->AsIntConstant()->GetValue();
   3549 
   3550     CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
   3551 
   3552     __ movl(numerator, eax);
   3553 
   3554     __ movl(eax, Immediate(magic));
   3555     __ imull(numerator);
   3556 
   3557     if (imm > 0 && magic < 0) {
   3558       __ addl(edx, numerator);
   3559     } else if (imm < 0 && magic > 0) {
   3560       __ subl(edx, numerator);
   3561     }
   3562 
   3563     if (shift != 0) {
   3564       __ sarl(edx, Immediate(shift));
   3565     }
   3566 
   3567     __ movl(eax, edx);
   3568     __ shrl(edx, Immediate(31));
   3569     __ addl(edx, eax);
   3570 
   3571     if (instruction->IsRem()) {
   3572       __ movl(eax, numerator);
   3573       __ imull(edx, Immediate(imm));
   3574       __ subl(eax, edx);
   3575       __ movl(edx, eax);
   3576     } else {
   3577       __ movl(eax, edx);
   3578     }
   3579   } else {
   3580     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
   3581 
   3582     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
   3583 
   3584     CpuRegister rax = eax;
   3585     CpuRegister rdx = edx;
   3586 
   3587     CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
   3588 
   3589     // Save the numerator.
   3590     __ movq(numerator, rax);
   3591 
   3592     // RAX = magic
   3593     codegen_->Load64BitValue(rax, magic);
   3594 
   3595     // RDX:RAX = magic * numerator
   3596     __ imulq(numerator);
   3597 
   3598     if (imm > 0 && magic < 0) {
   3599       // RDX += numerator
   3600       __ addq(rdx, numerator);
   3601     } else if (imm < 0 && magic > 0) {
   3602       // RDX -= numerator
   3603       __ subq(rdx, numerator);
   3604     }
   3605 
   3606     // Shift if needed.
   3607     if (shift != 0) {
   3608       __ sarq(rdx, Immediate(shift));
   3609     }
   3610 
   3611     // RDX += 1 if RDX < 0
   3612     __ movq(rax, rdx);
   3613     __ shrq(rdx, Immediate(63));
   3614     __ addq(rdx, rax);
   3615 
   3616     if (instruction->IsRem()) {
   3617       __ movq(rax, numerator);
   3618 
   3619       if (IsInt<32>(imm)) {
   3620         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
   3621       } else {
   3622         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
   3623       }
   3624 
   3625       __ subq(rax, rdx);
   3626       __ movq(rdx, rax);
   3627     } else {
   3628       __ movq(rax, rdx);
   3629     }
   3630   }
   3631 }
   3632 
   3633 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   3634   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3635   DataType::Type type = instruction->GetResultType();
   3636   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
   3637 
   3638   bool is_div = instruction->IsDiv();
   3639   LocationSummary* locations = instruction->GetLocations();
   3640 
   3641   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   3642   Location second = locations->InAt(1);
   3643 
   3644   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
   3645   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
   3646 
   3647   if (second.IsConstant()) {
   3648     int64_t imm = Int64FromConstant(second.GetConstant());
   3649 
   3650     if (imm == 0) {
   3651       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   3652     } else if (imm == 1 || imm == -1) {
   3653       DivRemOneOrMinusOne(instruction);
   3654     } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
   3655       DivByPowerOfTwo(instruction->AsDiv());
   3656     } else {
   3657       DCHECK(imm <= -2 || imm >= 2);
   3658       GenerateDivRemWithAnyConstant(instruction);
   3659     }
   3660   } else {
   3661     SlowPathCode* slow_path =
   3662         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
   3663             instruction, out.AsRegister(), type, is_div);
   3664     codegen_->AddSlowPath(slow_path);
   3665 
   3666     CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3667     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
   3668     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
   3669     // so it's safe to just use negl instead of more complex comparisons.
   3670     if (type == DataType::Type::kInt32) {
   3671       __ cmpl(second_reg, Immediate(-1));
   3672       __ j(kEqual, slow_path->GetEntryLabel());
   3673       // edx:eax <- sign-extended of eax
   3674       __ cdq();
   3675       // eax = quotient, edx = remainder
   3676       __ idivl(second_reg);
   3677     } else {
   3678       __ cmpq(second_reg, Immediate(-1));
   3679       __ j(kEqual, slow_path->GetEntryLabel());
   3680       // rdx:rax <- sign-extended of rax
   3681       __ cqo();
   3682       // rax = quotient, rdx = remainder
   3683       __ idivq(second_reg);
   3684     }
   3685     __ Bind(slow_path->GetExitLabel());
   3686   }
   3687 }
   3688 
   3689 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
   3690   LocationSummary* locations =
   3691       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
   3692   switch (div->GetResultType()) {
   3693     case DataType::Type::kInt32:
   3694     case DataType::Type::kInt64: {
   3695       locations->SetInAt(0, Location::RegisterLocation(RAX));
   3696       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
   3697       locations->SetOut(Location::SameAsFirstInput());
   3698       // Intel uses edx:eax as the dividend.
   3699       locations->AddTemp(Location::RegisterLocation(RDX));
   3700       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
   3701       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
   3702       // output and request another temp.
   3703       if (div->InputAt(1)->IsConstant()) {
   3704         locations->AddTemp(Location::RequiresRegister());
   3705       }
   3706       break;
   3707     }
   3708 
   3709     case DataType::Type::kFloat32:
   3710     case DataType::Type::kFloat64: {
   3711       locations->SetInAt(0, Location::RequiresFpuRegister());
   3712       locations->SetInAt(1, Location::Any());
   3713       locations->SetOut(Location::SameAsFirstInput());
   3714       break;
   3715     }
   3716 
   3717     default:
   3718       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3719   }
   3720 }
   3721 
   3722 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
   3723   LocationSummary* locations = div->GetLocations();
   3724   Location first = locations->InAt(0);
   3725   Location second = locations->InAt(1);
   3726   DCHECK(first.Equals(locations->Out()));
   3727 
   3728   DataType::Type type = div->GetResultType();
   3729   switch (type) {
   3730     case DataType::Type::kInt32:
   3731     case DataType::Type::kInt64: {
   3732       GenerateDivRemIntegral(div);
   3733       break;
   3734     }
   3735 
   3736     case DataType::Type::kFloat32: {
   3737       if (second.IsFpuRegister()) {
   3738         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3739       } else if (second.IsConstant()) {
   3740         __ divss(first.AsFpuRegister<XmmRegister>(),
   3741                  codegen_->LiteralFloatAddress(
   3742                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3743       } else {
   3744         DCHECK(second.IsStackSlot());
   3745         __ divss(first.AsFpuRegister<XmmRegister>(),
   3746                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3747       }
   3748       break;
   3749     }
   3750 
   3751     case DataType::Type::kFloat64: {
   3752       if (second.IsFpuRegister()) {
   3753         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3754       } else if (second.IsConstant()) {
   3755         __ divsd(first.AsFpuRegister<XmmRegister>(),
   3756                  codegen_->LiteralDoubleAddress(
   3757                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3758       } else {
   3759         DCHECK(second.IsDoubleStackSlot());
   3760         __ divsd(first.AsFpuRegister<XmmRegister>(),
   3761                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3762       }
   3763       break;
   3764     }
   3765 
   3766     default:
   3767       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3768   }
   3769 }
   3770 
   3771 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
   3772   DataType::Type type = rem->GetResultType();
   3773   LocationSummary* locations =
   3774     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
   3775 
   3776   switch (type) {
   3777     case DataType::Type::kInt32:
   3778     case DataType::Type::kInt64: {
   3779       locations->SetInAt(0, Location::RegisterLocation(RAX));
   3780       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
   3781       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
   3782       locations->SetOut(Location::RegisterLocation(RDX));
   3783       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
   3784       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
   3785       // output and request another temp.
   3786       if (rem->InputAt(1)->IsConstant()) {
   3787         locations->AddTemp(Location::RequiresRegister());
   3788       }
   3789       break;
   3790     }
   3791 
   3792     case DataType::Type::kFloat32:
   3793     case DataType::Type::kFloat64: {
   3794       locations->SetInAt(0, Location::Any());
   3795       locations->SetInAt(1, Location::Any());
   3796       locations->SetOut(Location::RequiresFpuRegister());
   3797       locations->AddTemp(Location::RegisterLocation(RAX));
   3798       break;
   3799     }
   3800 
   3801     default:
   3802       LOG(FATAL) << "Unexpected rem type " << type;
   3803   }
   3804 }
   3805 
   3806 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
   3807   DataType::Type type = rem->GetResultType();
   3808   switch (type) {
   3809     case DataType::Type::kInt32:
   3810     case DataType::Type::kInt64: {
   3811       GenerateDivRemIntegral(rem);
   3812       break;
   3813     }
   3814     case DataType::Type::kFloat32:
   3815     case DataType::Type::kFloat64: {
   3816       GenerateRemFP(rem);
   3817       break;
   3818     }
   3819     default:
   3820       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
   3821   }
   3822 }
   3823 
   3824 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3825   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   3826   locations->SetInAt(0, Location::Any());
   3827 }
   3828 
   3829 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3830   SlowPathCode* slow_path =
   3831       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
   3832   codegen_->AddSlowPath(slow_path);
   3833 
   3834   LocationSummary* locations = instruction->GetLocations();
   3835   Location value = locations->InAt(0);
   3836 
   3837   switch (instruction->GetType()) {
   3838     case DataType::Type::kBool:
   3839     case DataType::Type::kUint8:
   3840     case DataType::Type::kInt8:
   3841     case DataType::Type::kUint16:
   3842     case DataType::Type::kInt16:
   3843     case DataType::Type::kInt32: {
   3844       if (value.IsRegister()) {
   3845         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
   3846         __ j(kEqual, slow_path->GetEntryLabel());
   3847       } else if (value.IsStackSlot()) {
   3848         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
   3849         __ j(kEqual, slow_path->GetEntryLabel());
   3850       } else {
   3851         DCHECK(value.IsConstant()) << value;
   3852         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
   3853           __ jmp(slow_path->GetEntryLabel());
   3854         }
   3855       }
   3856       break;
   3857     }
   3858     case DataType::Type::kInt64: {
   3859       if (value.IsRegister()) {
   3860         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
   3861         __ j(kEqual, slow_path->GetEntryLabel());
   3862       } else if (value.IsDoubleStackSlot()) {
   3863         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
   3864         __ j(kEqual, slow_path->GetEntryLabel());
   3865       } else {
   3866         DCHECK(value.IsConstant()) << value;
   3867         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
   3868           __ jmp(slow_path->GetEntryLabel());
   3869         }
   3870       }
   3871       break;
   3872     }
   3873     default:
   3874       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
   3875   }
   3876 }
   3877 
   3878 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
   3879   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
   3880 
   3881   LocationSummary* locations =
   3882       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
   3883 
   3884   switch (op->GetResultType()) {
   3885     case DataType::Type::kInt32:
   3886     case DataType::Type::kInt64: {
   3887       locations->SetInAt(0, Location::RequiresRegister());
   3888       // The shift count needs to be in CL.
   3889       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
   3890       locations->SetOut(Location::SameAsFirstInput());
   3891       break;
   3892     }
   3893     default:
   3894       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
   3895   }
   3896 }
   3897 
   3898 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
   3899   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
   3900 
   3901   LocationSummary* locations = op->GetLocations();
   3902   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
   3903   Location second = locations->InAt(1);
   3904 
   3905   switch (op->GetResultType()) {
   3906     case DataType::Type::kInt32: {
   3907       if (second.IsRegister()) {
   3908         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3909         if (op->IsShl()) {
   3910           __ shll(first_reg, second_reg);
   3911         } else if (op->IsShr()) {
   3912           __ sarl(first_reg, second_reg);
   3913         } else {
   3914           __ shrl(first_reg, second_reg);
   3915         }
   3916       } else {
   3917         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
   3918         if (op->IsShl()) {
   3919           __ shll(first_reg, imm);
   3920         } else if (op->IsShr()) {
   3921           __ sarl(first_reg, imm);
   3922         } else {
   3923           __ shrl(first_reg, imm);
   3924         }
   3925       }
   3926       break;
   3927     }
   3928     case DataType::Type::kInt64: {
   3929       if (second.IsRegister()) {
   3930         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3931         if (op->IsShl()) {
   3932           __ shlq(first_reg, second_reg);
   3933         } else if (op->IsShr()) {
   3934           __ sarq(first_reg, second_reg);
   3935         } else {
   3936           __ shrq(first_reg, second_reg);
   3937         }
   3938       } else {
   3939         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
   3940         if (op->IsShl()) {
   3941           __ shlq(first_reg, imm);
   3942         } else if (op->IsShr()) {
   3943           __ sarq(first_reg, imm);
   3944         } else {
   3945           __ shrq(first_reg, imm);
   3946         }
   3947       }
   3948       break;
   3949     }
   3950     default:
   3951       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
   3952       UNREACHABLE();
   3953   }
   3954 }
   3955 
   3956 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
   3957   LocationSummary* locations =
   3958       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
   3959 
   3960   switch (ror->GetResultType()) {
   3961     case DataType::Type::kInt32:
   3962     case DataType::Type::kInt64: {
   3963       locations->SetInAt(0, Location::RequiresRegister());
   3964       // The shift count needs to be in CL (unless it is a constant).
   3965       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
   3966       locations->SetOut(Location::SameAsFirstInput());
   3967       break;
   3968     }
   3969     default:
   3970       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
   3971       UNREACHABLE();
   3972   }
   3973 }
   3974 
   3975 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
   3976   LocationSummary* locations = ror->GetLocations();
   3977   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
   3978   Location second = locations->InAt(1);
   3979 
   3980   switch (ror->GetResultType()) {
   3981     case DataType::Type::kInt32:
   3982       if (second.IsRegister()) {
   3983         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3984         __ rorl(first_reg, second_reg);
   3985       } else {
   3986         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
   3987         __ rorl(first_reg, imm);
   3988       }
   3989       break;
   3990     case DataType::Type::kInt64:
   3991       if (second.IsRegister()) {
   3992         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3993         __ rorq(first_reg, second_reg);
   3994       } else {
   3995         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
   3996         __ rorq(first_reg, imm);
   3997       }
   3998       break;
   3999     default:
   4000       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
   4001       UNREACHABLE();
   4002   }
   4003 }
   4004 
   4005 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
   4006   HandleShift(shl);
   4007 }
   4008 
   4009 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
   4010   HandleShift(shl);
   4011 }
   4012 
   4013 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
   4014   HandleShift(shr);
   4015 }
   4016 
   4017 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
   4018   HandleShift(shr);
   4019 }
   4020 
   4021 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
   4022   HandleShift(ushr);
   4023 }
   4024 
   4025 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
   4026   HandleShift(ushr);
   4027 }
   4028 
   4029 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   4030   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
   4031       instruction, LocationSummary::kCallOnMainOnly);
   4032   InvokeRuntimeCallingConvention calling_convention;
   4033   if (instruction->IsStringAlloc()) {
   4034     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
   4035   } else {
   4036     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   4037   }
   4038   locations->SetOut(Location::RegisterLocation(RAX));
   4039 }
   4040 
   4041 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
   4042   // Note: if heap poisoning is enabled, the entry point takes cares
   4043   // of poisoning the reference.
   4044   if (instruction->IsStringAlloc()) {
   4045     // String is allocated through StringFactory. Call NewEmptyString entry point.
   4046     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   4047     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
   4048     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
   4049     __ call(Address(temp, code_offset.SizeValue()));
   4050     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   4051   } else {
   4052     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
   4053     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   4054     DCHECK(!codegen_->IsLeafMethod());
   4055   }
   4056 }
   4057 
   4058 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
   4059   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
   4060       instruction, LocationSummary::kCallOnMainOnly);
   4061   InvokeRuntimeCallingConvention calling_convention;
   4062   locations->SetOut(Location::RegisterLocation(RAX));
   4063   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   4064   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   4065 }
   4066 
   4067 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
   4068   // Note: if heap poisoning is enabled, the entry point takes cares
   4069   // of poisoning the reference.
   4070   QuickEntrypointEnum entrypoint =
   4071       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
   4072   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   4073   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
   4074   DCHECK(!codegen_->IsLeafMethod());
   4075 }
   4076 
   4077 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
   4078   LocationSummary* locations =
   4079       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
   4080   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
   4081   if (location.IsStackSlot()) {
   4082     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   4083   } else if (location.IsDoubleStackSlot()) {
   4084     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   4085   }
   4086   locations->SetOut(location);
   4087 }
   4088 
   4089 void InstructionCodeGeneratorX86_64::VisitParameterValue(
   4090     HParameterValue* instruction ATTRIBUTE_UNUSED) {
   4091   // Nothing to do, the parameter is already at its location.
   4092 }
   4093 
   4094 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
   4095   LocationSummary* locations =
   4096       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
   4097   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
   4098 }
   4099 
   4100 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
   4101     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
   4102   // Nothing to do, the method is already at its location.
   4103 }
   4104 
   4105 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   4106   LocationSummary* locations =
   4107       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
   4108   locations->SetInAt(0, Location::RequiresRegister());
   4109   locations->SetOut(Location::RequiresRegister());
   4110 }
   4111 
   4112 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   4113   LocationSummary* locations = instruction->GetLocations();
   4114   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
   4115     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   4116         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
   4117     __ movq(locations->Out().AsRegister<CpuRegister>(),
   4118             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
   4119   } else {
   4120     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   4121         instruction->GetIndex(), kX86_64PointerSize));
   4122     __ movq(locations->Out().AsRegister<CpuRegister>(),
   4123             Address(locations->InAt(0).AsRegister<CpuRegister>(),
   4124             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   4125     __ movq(locations->Out().AsRegister<CpuRegister>(),
   4126             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
   4127   }
   4128 }
   4129 
   4130 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
   4131   LocationSummary* locations =
   4132       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
   4133   locations->SetInAt(0, Location::RequiresRegister());
   4134   locations->SetOut(Location::SameAsFirstInput());
   4135 }
   4136 
   4137 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
   4138   LocationSummary* locations = not_->GetLocations();
   4139   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
   4140             locations->Out().AsRegister<CpuRegister>().AsRegister());
   4141   Location out = locations->Out();
   4142   switch (not_->GetResultType()) {
   4143     case DataType::Type::kInt32:
   4144       __ notl(out.AsRegister<CpuRegister>());
   4145       break;
   4146 
   4147     case DataType::Type::kInt64:
   4148       __ notq(out.AsRegister<CpuRegister>());
   4149       break;
   4150 
   4151     default:
   4152       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
   4153   }
   4154 }
   4155 
   4156 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
   4157   LocationSummary* locations =
   4158       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
   4159   locations->SetInAt(0, Location::RequiresRegister());
   4160   locations->SetOut(Location::SameAsFirstInput());
   4161 }
   4162 
   4163 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
   4164   LocationSummary* locations = bool_not->GetLocations();
   4165   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
   4166             locations->Out().AsRegister<CpuRegister>().AsRegister());
   4167   Location out = locations->Out();
   4168   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
   4169 }
   4170 
   4171 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
   4172   LocationSummary* locations =
   4173       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
   4174   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
   4175     locations->SetInAt(i, Location::Any());
   4176   }
   4177   locations->SetOut(Location::Any());
   4178 }
   4179 
   4180 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   4181   LOG(FATAL) << "Unimplemented";
   4182 }
   4183 
   4184 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
   4185   /*
   4186    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
   4187    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
   4188    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
   4189    */
   4190   switch (kind) {
   4191     case MemBarrierKind::kAnyAny: {
   4192       MemoryFence();
   4193       break;
   4194     }
   4195     case MemBarrierKind::kAnyStore:
   4196     case MemBarrierKind::kLoadAny:
   4197     case MemBarrierKind::kStoreStore: {
   4198       // nop
   4199       break;
   4200     }
   4201     case MemBarrierKind::kNTStoreStore:
   4202       // Non-Temporal Store/Store needs an explicit fence.
   4203       MemoryFence(/* non-temporal */ true);
   4204       break;
   4205   }
   4206 }
   4207 
   4208 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
   4209   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   4210 
   4211   bool object_field_get_with_read_barrier =
   4212       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
   4213   LocationSummary* locations =
   4214       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
   4215                                                        object_field_get_with_read_barrier
   4216                                                            ? LocationSummary::kCallOnSlowPath
   4217                                                            : LocationSummary::kNoCall);
   4218   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
   4219     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   4220   }
   4221   locations->SetInAt(0, Location::RequiresRegister());
   4222   if (DataType::IsFloatingPointType(instruction->GetType())) {
   4223     locations->SetOut(Location::RequiresFpuRegister());
   4224   } else {
   4225     // The output overlaps for an object field get when read barriers
   4226     // are enabled: we do not want the move to overwrite the object's
   4227     // location, as we need it to emit the read barrier.
   4228     locations->SetOut(
   4229         Location::RequiresRegister(),
   4230         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   4231   }
   4232 }
   4233 
   4234 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
   4235                                                     const FieldInfo& field_info) {
   4236   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   4237 
   4238   LocationSummary* locations = instruction->GetLocations();
   4239   Location base_loc = locations->InAt(0);
   4240   CpuRegister base = base_loc.AsRegister<CpuRegister>();
   4241   Location out = locations->Out();
   4242   bool is_volatile = field_info.IsVolatile();
   4243   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
   4244   DataType::Type load_type = instruction->GetType();
   4245   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   4246 
   4247   switch (load_type) {
   4248     case DataType::Type::kBool:
   4249     case DataType::Type::kUint8: {
   4250       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
   4251       break;
   4252     }
   4253 
   4254     case DataType::Type::kInt8: {
   4255       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
   4256       break;
   4257     }
   4258 
   4259     case DataType::Type::kUint16: {
   4260       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
   4261       break;
   4262     }
   4263 
   4264     case DataType::Type::kInt16: {
   4265       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
   4266       break;
   4267     }
   4268 
   4269     case DataType::Type::kInt32: {
   4270       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
   4271       break;
   4272     }
   4273 
   4274     case DataType::Type::kReference: {
   4275       // /* HeapReference<Object> */ out = *(base + offset)
   4276       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   4277         // Note that a potential implicit null check is handled in this
   4278         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
   4279         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   4280             instruction, out, base, offset, /* needs_null_check */ true);
   4281         if (is_volatile) {
   4282           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4283         }
   4284       } else {
   4285         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
   4286         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4287         if (is_volatile) {
   4288           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4289         }
   4290         // If read barriers are enabled, emit read barriers other than
   4291         // Baker's using a slow path (and also unpoison the loaded
   4292         // reference, if heap poisoning is enabled).
   4293         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
   4294       }
   4295       break;
   4296     }
   4297 
   4298     case DataType::Type::kInt64: {
   4299       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
   4300       break;
   4301     }
   4302 
   4303     case DataType::Type::kFloat32: {
   4304       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
   4305       break;
   4306     }
   4307 
   4308     case DataType::Type::kFloat64: {
   4309       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
   4310       break;
   4311     }
   4312 
   4313     case DataType::Type::kUint32:
   4314     case DataType::Type::kUint64:
   4315     case DataType::Type::kVoid:
   4316       LOG(FATAL) << "Unreachable type " << load_type;
   4317       UNREACHABLE();
   4318   }
   4319 
   4320   if (load_type == DataType::Type::kReference) {
   4321     // Potential implicit null checks, in the case of reference
   4322     // fields, are handled in the previous switch statement.
   4323   } else {
   4324     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4325   }
   4326 
   4327   if (is_volatile) {
   4328     if (load_type == DataType::Type::kReference) {
   4329       // Memory barriers, in the case of references, are also handled
   4330       // in the previous switch statement.
   4331     } else {
   4332       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4333     }
   4334   }
   4335 }
   4336 
   4337 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
   4338                                             const FieldInfo& field_info) {
   4339   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   4340 
   4341   LocationSummary* locations =
   4342       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
   4343   DataType::Type field_type = field_info.GetFieldType();
   4344   bool is_volatile = field_info.IsVolatile();
   4345   bool needs_write_barrier =
   4346       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
   4347 
   4348   locations->SetInAt(0, Location::RequiresRegister());
   4349   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
   4350     if (is_volatile) {
   4351       // In order to satisfy the semantics of volatile, this must be a single instruction store.
   4352       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
   4353     } else {
   4354       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
   4355     }
   4356   } else {
   4357     if (is_volatile) {
   4358       // In order to satisfy the semantics of volatile, this must be a single instruction store.
   4359       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
   4360     } else {
   4361       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4362     }
   4363   }
   4364   if (needs_write_barrier) {
   4365     // Temporary registers for the write barrier.
   4366     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   4367     locations->AddTemp(Location::RequiresRegister());
   4368   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
   4369     // Temporary register for the reference poisoning.
   4370     locations->AddTemp(Location::RequiresRegister());
   4371   }
   4372 }
   4373 
   4374 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
   4375                                                     const FieldInfo& field_info,
   4376                                                     bool value_can_be_null) {
   4377   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   4378 
   4379   LocationSummary* locations = instruction->GetLocations();
   4380   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
   4381   Location value = locations->InAt(1);
   4382   bool is_volatile = field_info.IsVolatile();
   4383   DataType::Type field_type = field_info.GetFieldType();
   4384   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   4385 
   4386   if (is_volatile) {
   4387     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   4388   }
   4389 
   4390   bool maybe_record_implicit_null_check_done = false;
   4391 
   4392   switch (field_type) {
   4393     case DataType::Type::kBool:
   4394     case DataType::Type::kUint8:
   4395     case DataType::Type::kInt8: {
   4396       if (value.IsConstant()) {
   4397         __ movb(Address(base, offset),
   4398                 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
   4399       } else {
   4400         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
   4401       }
   4402       break;
   4403     }
   4404 
   4405     case DataType::Type::kUint16:
   4406     case DataType::Type::kInt16: {
   4407       if (value.IsConstant()) {
   4408         __ movw(Address(base, offset),
   4409                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
   4410       } else {
   4411         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
   4412       }
   4413       break;
   4414     }
   4415 
   4416     case DataType::Type::kInt32:
   4417     case DataType::Type::kReference: {
   4418       if (value.IsConstant()) {
   4419         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4420         // `field_type == DataType::Type::kReference` implies `v == 0`.
   4421         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
   4422         // Note: if heap poisoning is enabled, no need to poison
   4423         // (negate) `v` if it is a reference, as it would be null.
   4424         __ movl(Address(base, offset), Immediate(v));
   4425       } else {
   4426         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
   4427           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   4428           __ movl(temp, value.AsRegister<CpuRegister>());
   4429           __ PoisonHeapReference(temp);
   4430           __ movl(Address(base, offset), temp);
   4431         } else {
   4432           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
   4433         }
   4434       }
   4435       break;
   4436     }
   4437 
   4438     case DataType::Type::kInt64: {
   4439       if (value.IsConstant()) {
   4440         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
   4441         codegen_->MoveInt64ToAddress(Address(base, offset),
   4442                                      Address(base, offset + sizeof(int32_t)),
   4443                                      v,
   4444                                      instruction);
   4445         maybe_record_implicit_null_check_done = true;
   4446       } else {
   4447         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
   4448       }
   4449       break;
   4450     }
   4451 
   4452     case DataType::Type::kFloat32: {
   4453       if (value.IsConstant()) {
   4454         int32_t v =
   4455             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
   4456         __ movl(Address(base, offset), Immediate(v));
   4457       } else {
   4458         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
   4459       }
   4460       break;
   4461     }
   4462 
   4463     case DataType::Type::kFloat64: {
   4464       if (value.IsConstant()) {
   4465         int64_t v =
   4466             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
   4467         codegen_->MoveInt64ToAddress(Address(base, offset),
   4468                                      Address(base, offset + sizeof(int32_t)),
   4469                                      v,
   4470                                      instruction);
   4471         maybe_record_implicit_null_check_done = true;
   4472       } else {
   4473         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
   4474       }
   4475       break;
   4476     }
   4477 
   4478     case DataType::Type::kUint32:
   4479     case DataType::Type::kUint64:
   4480     case DataType::Type::kVoid:
   4481       LOG(FATAL) << "Unreachable type " << field_type;
   4482       UNREACHABLE();
   4483   }
   4484 
   4485   if (!maybe_record_implicit_null_check_done) {
   4486     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4487   }
   4488 
   4489   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
   4490     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   4491     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
   4492     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
   4493   }
   4494 
   4495   if (is_volatile) {
   4496     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   4497   }
   4498 }
   4499 
   4500 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   4501   HandleFieldSet(instruction, instruction->GetFieldInfo());
   4502 }
   4503 
   4504 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   4505   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   4506 }
   4507 
   4508 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   4509   HandleFieldGet(instruction);
   4510 }
   4511 
   4512 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   4513   HandleFieldGet(instruction, instruction->GetFieldInfo());
   4514 }
   4515 
   4516 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   4517   HandleFieldGet(instruction);
   4518 }
   4519 
   4520 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   4521   HandleFieldGet(instruction, instruction->GetFieldInfo());
   4522 }
   4523 
   4524 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   4525   HandleFieldSet(instruction, instruction->GetFieldInfo());
   4526 }
   4527 
   4528 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   4529   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   4530 }
   4531 
   4532 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
   4533     HUnresolvedInstanceFieldGet* instruction) {
   4534   FieldAccessCallingConventionX86_64 calling_convention;
   4535   codegen_->CreateUnresolvedFieldLocationSummary(
   4536       instruction, instruction->GetFieldType(), calling_convention);
   4537 }
   4538 
   4539 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
   4540     HUnresolvedInstanceFieldGet* instruction) {
   4541   FieldAccessCallingConventionX86_64 calling_convention;
   4542   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4543                                           instruction->GetFieldType(),
   4544                                           instruction->GetFieldIndex(),
   4545                                           instruction->GetDexPc(),
   4546                                           calling_convention);
   4547 }
   4548 
   4549 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
   4550     HUnresolvedInstanceFieldSet* instruction) {
   4551   FieldAccessCallingConventionX86_64 calling_convention;
   4552   codegen_->CreateUnresolvedFieldLocationSummary(
   4553       instruction, instruction->GetFieldType(), calling_convention);
   4554 }
   4555 
   4556 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
   4557     HUnresolvedInstanceFieldSet* instruction) {
   4558   FieldAccessCallingConventionX86_64 calling_convention;
   4559   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4560                                           instruction->GetFieldType(),
   4561                                           instruction->GetFieldIndex(),
   4562                                           instruction->GetDexPc(),
   4563                                           calling_convention);
   4564 }
   4565 
   4566 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
   4567     HUnresolvedStaticFieldGet* instruction) {
   4568   FieldAccessCallingConventionX86_64 calling_convention;
   4569   codegen_->CreateUnresolvedFieldLocationSummary(
   4570       instruction, instruction->GetFieldType(), calling_convention);
   4571 }
   4572 
   4573 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
   4574     HUnresolvedStaticFieldGet* instruction) {
   4575   FieldAccessCallingConventionX86_64 calling_convention;
   4576   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4577                                           instruction->GetFieldType(),
   4578                                           instruction->GetFieldIndex(),
   4579                                           instruction->GetDexPc(),
   4580                                           calling_convention);
   4581 }
   4582 
   4583 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
   4584     HUnresolvedStaticFieldSet* instruction) {
   4585   FieldAccessCallingConventionX86_64 calling_convention;
   4586   codegen_->CreateUnresolvedFieldLocationSummary(
   4587       instruction, instruction->GetFieldType(), calling_convention);
   4588 }
   4589 
   4590 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
   4591     HUnresolvedStaticFieldSet* instruction) {
   4592   FieldAccessCallingConventionX86_64 calling_convention;
   4593   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4594                                           instruction->GetFieldType(),
   4595                                           instruction->GetFieldIndex(),
   4596                                           instruction->GetDexPc(),
   4597                                           calling_convention);
   4598 }
   4599 
   4600 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
   4601   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   4602   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
   4603       ? Location::RequiresRegister()
   4604       : Location::Any();
   4605   locations->SetInAt(0, loc);
   4606 }
   4607 
   4608 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
   4609   if (CanMoveNullCheckToUser(instruction)) {
   4610     return;
   4611   }
   4612   LocationSummary* locations = instruction->GetLocations();
   4613   Location obj = locations->InAt(0);
   4614 
   4615   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
   4616   RecordPcInfo(instruction, instruction->GetDexPc());
   4617 }
   4618 
   4619 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   4620   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
   4621   AddSlowPath(slow_path);
   4622 
   4623   LocationSummary* locations = instruction->GetLocations();
   4624   Location obj = locations->InAt(0);
   4625 
   4626   if (obj.IsRegister()) {
   4627     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
   4628   } else if (obj.IsStackSlot()) {
   4629     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
   4630   } else {
   4631     DCHECK(obj.IsConstant()) << obj;
   4632     DCHECK(obj.GetConstant()->IsNullConstant());
   4633     __ jmp(slow_path->GetEntryLabel());
   4634     return;
   4635   }
   4636   __ j(kEqual, slow_path->GetEntryLabel());
   4637 }
   4638 
   4639 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
   4640   codegen_->GenerateNullCheck(instruction);
   4641 }
   4642 
   4643 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
   4644   bool object_array_get_with_read_barrier =
   4645       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
   4646   LocationSummary* locations =
   4647       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
   4648                                                        object_array_get_with_read_barrier
   4649                                                            ? LocationSummary::kCallOnSlowPath
   4650                                                            : LocationSummary::kNoCall);
   4651   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
   4652     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   4653   }
   4654   locations->SetInAt(0, Location::RequiresRegister());
   4655   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4656   if (DataType::IsFloatingPointType(instruction->GetType())) {
   4657     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   4658   } else {
   4659     // The output overlaps for an object array get when read barriers
   4660     // are enabled: we do not want the move to overwrite the array's
   4661     // location, as we need it to emit the read barrier.
   4662     locations->SetOut(
   4663         Location::RequiresRegister(),
   4664         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   4665   }
   4666 }
   4667 
   4668 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
   4669   LocationSummary* locations = instruction->GetLocations();
   4670   Location obj_loc = locations->InAt(0);
   4671   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   4672   Location index = locations->InAt(1);
   4673   Location out_loc = locations->Out();
   4674   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   4675 
   4676   DataType::Type type = instruction->GetType();
   4677   switch (type) {
   4678     case DataType::Type::kBool:
   4679     case DataType::Type::kUint8: {
   4680       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4681       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
   4682       break;
   4683     }
   4684 
   4685     case DataType::Type::kInt8: {
   4686       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4687       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
   4688       break;
   4689     }
   4690 
   4691     case DataType::Type::kUint16: {
   4692       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4693       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   4694         // Branch cases into compressed and uncompressed for each index's type.
   4695         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
   4696         NearLabel done, not_compressed;
   4697         __ testb(Address(obj, count_offset), Immediate(1));
   4698         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4699         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
   4700                       "Expecting 0=compressed, 1=uncompressed");
   4701         __ j(kNotZero, &not_compressed);
   4702         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
   4703         __ jmp(&done);
   4704         __ Bind(&not_compressed);
   4705         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
   4706         __ Bind(&done);
   4707       } else {
   4708         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
   4709       }
   4710       break;
   4711     }
   4712 
   4713     case DataType::Type::kInt16: {
   4714       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4715       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
   4716       break;
   4717     }
   4718 
   4719     case DataType::Type::kInt32: {
   4720       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4721       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
   4722       break;
   4723     }
   4724 
   4725     case DataType::Type::kReference: {
   4726       static_assert(
   4727           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   4728           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   4729       // /* HeapReference<Object> */ out =
   4730       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   4731       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   4732         // Note that a potential implicit null check is handled in this
   4733         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
   4734         codegen_->GenerateArrayLoadWithBakerReadBarrier(
   4735             instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
   4736       } else {
   4737         CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4738         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
   4739         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4740         // If read barriers are enabled, emit read barriers other than
   4741         // Baker's using a slow path (and also unpoison the loaded
   4742         // reference, if heap poisoning is enabled).
   4743         if (index.IsConstant()) {
   4744           uint32_t offset =
   4745               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
   4746           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
   4747         } else {
   4748           codegen_->MaybeGenerateReadBarrierSlow(
   4749               instruction, out_loc, out_loc, obj_loc, data_offset, index);
   4750         }
   4751       }
   4752       break;
   4753     }
   4754 
   4755     case DataType::Type::kInt64: {
   4756       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4757       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
   4758       break;
   4759     }
   4760 
   4761     case DataType::Type::kFloat32: {
   4762       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
   4763       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
   4764       break;
   4765     }
   4766 
   4767     case DataType::Type::kFloat64: {
   4768       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
   4769       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
   4770       break;
   4771     }
   4772 
   4773     case DataType::Type::kUint32:
   4774     case DataType::Type::kUint64:
   4775     case DataType::Type::kVoid:
   4776       LOG(FATAL) << "Unreachable type " << type;
   4777       UNREACHABLE();
   4778   }
   4779 
   4780   if (type == DataType::Type::kReference) {
   4781     // Potential implicit null checks, in the case of reference
   4782     // arrays, are handled in the previous switch statement.
   4783   } else {
   4784     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4785   }
   4786 }
   4787 
   4788 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
   4789   DataType::Type value_type = instruction->GetComponentType();
   4790 
   4791   bool needs_write_barrier =
   4792       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   4793   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   4794 
   4795   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
   4796       instruction,
   4797       may_need_runtime_call_for_type_check ?
   4798           LocationSummary::kCallOnSlowPath :
   4799           LocationSummary::kNoCall);
   4800 
   4801   locations->SetInAt(0, Location::RequiresRegister());
   4802   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4803   if (DataType::IsFloatingPointType(value_type)) {
   4804     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   4805   } else {
   4806     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   4807   }
   4808 
   4809   if (needs_write_barrier) {
   4810     // Temporary registers for the write barrier.
   4811     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
   4812     locations->AddTemp(Location::RequiresRegister());
   4813   }
   4814 }
   4815 
   4816 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   4817   LocationSummary* locations = instruction->GetLocations();
   4818   Location array_loc = locations->InAt(0);
   4819   CpuRegister array = array_loc.AsRegister<CpuRegister>();
   4820   Location index = locations->InAt(1);
   4821   Location value = locations->InAt(2);
   4822   DataType::Type value_type = instruction->GetComponentType();
   4823   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   4824   bool needs_write_barrier =
   4825       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   4826   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   4827   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   4828   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   4829 
   4830   switch (value_type) {
   4831     case DataType::Type::kBool:
   4832     case DataType::Type::kUint8:
   4833     case DataType::Type::kInt8: {
   4834       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
   4835       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
   4836       if (value.IsRegister()) {
   4837         __ movb(address, value.AsRegister<CpuRegister>());
   4838       } else {
   4839         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
   4840       }
   4841       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4842       break;
   4843     }
   4844 
   4845     case DataType::Type::kUint16:
   4846     case DataType::Type::kInt16: {
   4847       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
   4848       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
   4849       if (value.IsRegister()) {
   4850         __ movw(address, value.AsRegister<CpuRegister>());
   4851       } else {
   4852         DCHECK(value.IsConstant()) << value;
   4853         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
   4854       }
   4855       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4856       break;
   4857     }
   4858 
   4859     case DataType::Type::kReference: {
   4860       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4861       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
   4862 
   4863       if (!value.IsRegister()) {
   4864         // Just setting null.
   4865         DCHECK(instruction->InputAt(2)->IsNullConstant());
   4866         DCHECK(value.IsConstant()) << value;
   4867         __ movl(address, Immediate(0));
   4868         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4869         DCHECK(!needs_write_barrier);
   4870         DCHECK(!may_need_runtime_call_for_type_check);
   4871         break;
   4872       }
   4873 
   4874       DCHECK(needs_write_barrier);
   4875       CpuRegister register_value = value.AsRegister<CpuRegister>();
   4876       // We cannot use a NearLabel for `done`, as its range may be too
   4877       // short when Baker read barriers are enabled.
   4878       Label done;
   4879       NearLabel not_null, do_put;
   4880       SlowPathCode* slow_path = nullptr;
   4881       Location temp_loc = locations->GetTemp(0);
   4882       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
   4883       if (may_need_runtime_call_for_type_check) {
   4884         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
   4885         codegen_->AddSlowPath(slow_path);
   4886         if (instruction->GetValueCanBeNull()) {
   4887           __ testl(register_value, register_value);
   4888           __ j(kNotEqual, &not_null);
   4889           __ movl(address, Immediate(0));
   4890           codegen_->MaybeRecordImplicitNullCheck(instruction);
   4891           __ jmp(&done);
   4892           __ Bind(&not_null);
   4893         }
   4894 
   4895         // Note that when Baker read barriers are enabled, the type
   4896         // checks are performed without read barriers.  This is fine,
   4897         // even in the case where a class object is in the from-space
   4898         // after the flip, as a comparison involving such a type would
   4899         // not produce a false positive; it may of course produce a
   4900         // false negative, in which case we would take the ArraySet
   4901         // slow path.
   4902 
   4903         // /* HeapReference<Class> */ temp = array->klass_
   4904         __ movl(temp, Address(array, class_offset));
   4905         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4906         __ MaybeUnpoisonHeapReference(temp);
   4907 
   4908         // /* HeapReference<Class> */ temp = temp->component_type_
   4909         __ movl(temp, Address(temp, component_offset));
   4910         // If heap poisoning is enabled, no need to unpoison `temp`
   4911         // nor the object reference in `register_value->klass`, as
   4912         // we are comparing two poisoned references.
   4913         __ cmpl(temp, Address(register_value, class_offset));
   4914 
   4915         if (instruction->StaticTypeOfArrayIsObjectArray()) {
   4916           __ j(kEqual, &do_put);
   4917           // If heap poisoning is enabled, the `temp` reference has
   4918           // not been unpoisoned yet; unpoison it now.
   4919           __ MaybeUnpoisonHeapReference(temp);
   4920 
   4921           // If heap poisoning is enabled, no need to unpoison the
   4922           // heap reference loaded below, as it is only used for a
   4923           // comparison with null.
   4924           __ cmpl(Address(temp, super_offset), Immediate(0));
   4925           __ j(kNotEqual, slow_path->GetEntryLabel());
   4926           __ Bind(&do_put);
   4927         } else {
   4928           __ j(kNotEqual, slow_path->GetEntryLabel());
   4929         }
   4930       }
   4931 
   4932       if (kPoisonHeapReferences) {
   4933         __ movl(temp, register_value);
   4934         __ PoisonHeapReference(temp);
   4935         __ movl(address, temp);
   4936       } else {
   4937         __ movl(address, register_value);
   4938       }
   4939       if (!may_need_runtime_call_for_type_check) {
   4940         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4941       }
   4942 
   4943       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
   4944       codegen_->MarkGCCard(
   4945           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
   4946       __ Bind(&done);
   4947 
   4948       if (slow_path != nullptr) {
   4949         __ Bind(slow_path->GetExitLabel());
   4950       }
   4951 
   4952       break;
   4953     }
   4954 
   4955     case DataType::Type::kInt32: {
   4956       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4957       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
   4958       if (value.IsRegister()) {
   4959         __ movl(address, value.AsRegister<CpuRegister>());
   4960       } else {
   4961         DCHECK(value.IsConstant()) << value;
   4962         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4963         __ movl(address, Immediate(v));
   4964       }
   4965       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4966       break;
   4967     }
   4968 
   4969     case DataType::Type::kInt64: {
   4970       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
   4971       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
   4972       if (value.IsRegister()) {
   4973         __ movq(address, value.AsRegister<CpuRegister>());
   4974         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4975       } else {
   4976         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
   4977         Address address_high =
   4978             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
   4979         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
   4980       }
   4981       break;
   4982     }
   4983 
   4984     case DataType::Type::kFloat32: {
   4985       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
   4986       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
   4987       if (value.IsFpuRegister()) {
   4988         __ movss(address, value.AsFpuRegister<XmmRegister>());
   4989       } else {
   4990         DCHECK(value.IsConstant());
   4991         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
   4992         __ movl(address, Immediate(v));
   4993       }
   4994       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4995       break;
   4996     }
   4997 
   4998     case DataType::Type::kFloat64: {
   4999       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
   5000       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
   5001       if (value.IsFpuRegister()) {
   5002         __ movsd(address, value.AsFpuRegister<XmmRegister>());
   5003         codegen_->MaybeRecordImplicitNullCheck(instruction);
   5004       } else {
   5005         int64_t v =
   5006             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
   5007         Address address_high =
   5008             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
   5009         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
   5010       }
   5011       break;
   5012     }
   5013 
   5014     case DataType::Type::kUint32:
   5015     case DataType::Type::kUint64:
   5016     case DataType::Type::kVoid:
   5017       LOG(FATAL) << "Unreachable type " << instruction->GetType();
   5018       UNREACHABLE();
   5019   }
   5020 }
   5021 
   5022 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
   5023   LocationSummary* locations =
   5024       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
   5025   locations->SetInAt(0, Location::RequiresRegister());
   5026   if (!instruction->IsEmittedAtUseSite()) {
   5027     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   5028   }
   5029 }
   5030 
   5031 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
   5032   if (instruction->IsEmittedAtUseSite()) {
   5033     return;
   5034   }
   5035 
   5036   LocationSummary* locations = instruction->GetLocations();
   5037   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   5038   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   5039   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   5040   __ movl(out, Address(obj, offset));
   5041   codegen_->MaybeRecordImplicitNullCheck(instruction);
   5042   // Mask out most significant bit in case the array is String's array of char.
   5043   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
   5044     __ shrl(out, Immediate(1));
   5045   }
   5046 }
   5047 
   5048 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
   5049   RegisterSet caller_saves = RegisterSet::Empty();
   5050   InvokeRuntimeCallingConvention calling_convention;
   5051   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   5052   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   5053   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
   5054   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   5055   HInstruction* length = instruction->InputAt(1);
   5056   if (!length->IsEmittedAtUseSite()) {
   5057     locations->SetInAt(1, Location::RegisterOrConstant(length));
   5058   }
   5059 }
   5060 
   5061 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
   5062   LocationSummary* locations = instruction->GetLocations();
   5063   Location index_loc = locations->InAt(0);
   5064   Location length_loc = locations->InAt(1);
   5065   SlowPathCode* slow_path =
   5066       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
   5067 
   5068   if (length_loc.IsConstant()) {
   5069     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
   5070     if (index_loc.IsConstant()) {
   5071       // BCE will remove the bounds check if we are guarenteed to pass.
   5072       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
   5073       if (index < 0 || index >= length) {
   5074         codegen_->AddSlowPath(slow_path);
   5075         __ jmp(slow_path->GetEntryLabel());
   5076       } else {
   5077         // Some optimization after BCE may have generated this, and we should not
   5078         // generate a bounds check if it is a valid range.
   5079       }
   5080       return;
   5081     }
   5082 
   5083     // We have to reverse the jump condition because the length is the constant.
   5084     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
   5085     __ cmpl(index_reg, Immediate(length));
   5086     codegen_->AddSlowPath(slow_path);
   5087     __ j(kAboveEqual, slow_path->GetEntryLabel());
   5088   } else {
   5089     HInstruction* array_length = instruction->InputAt(1);
   5090     if (array_length->IsEmittedAtUseSite()) {
   5091       // Address the length field in the array.
   5092       DCHECK(array_length->IsArrayLength());
   5093       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
   5094       Location array_loc = array_length->GetLocations()->InAt(0);
   5095       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
   5096       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
   5097         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
   5098         // the string compression flag) with the in-memory length and avoid the temporary.
   5099         CpuRegister length_reg = CpuRegister(TMP);
   5100         __ movl(length_reg, array_len);
   5101         codegen_->MaybeRecordImplicitNullCheck(array_length);
   5102         __ shrl(length_reg, Immediate(1));
   5103         codegen_->GenerateIntCompare(length_reg, index_loc);
   5104       } else {
   5105         // Checking the bound for general case:
   5106         // Array of char or String's array when the compression feature off.
   5107         if (index_loc.IsConstant()) {
   5108           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
   5109           __ cmpl(array_len, Immediate(value));
   5110         } else {
   5111           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
   5112         }
   5113         codegen_->MaybeRecordImplicitNullCheck(array_length);
   5114       }
   5115     } else {
   5116       codegen_->GenerateIntCompare(length_loc, index_loc);
   5117     }
   5118     codegen_->AddSlowPath(slow_path);
   5119     __ j(kBelowEqual, slow_path->GetEntryLabel());
   5120   }
   5121 }
   5122 
   5123 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
   5124                                      CpuRegister card,
   5125                                      CpuRegister object,
   5126                                      CpuRegister value,
   5127                                      bool value_can_be_null) {
   5128   NearLabel is_null;
   5129   if (value_can_be_null) {
   5130     __ testl(value, value);
   5131     __ j(kEqual, &is_null);
   5132   }
   5133   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
   5134                                         /* no_rip */ true));
   5135   __ movq(temp, object);
   5136   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
   5137   __ movb(Address(temp, card, TIMES_1, 0), card);
   5138   if (value_can_be_null) {
   5139     __ Bind(&is_null);
   5140   }
   5141 }
   5142 
   5143 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   5144   LOG(FATAL) << "Unimplemented";
   5145 }
   5146 
   5147 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
   5148   if (instruction->GetNext()->IsSuspendCheck() &&
   5149       instruction->GetBlock()->GetLoopInformation() != nullptr) {
   5150     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
   5151     // The back edge will generate the suspend check.
   5152     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
   5153   }
   5154 
   5155   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
   5156 }
   5157 
   5158 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5159   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
   5160       instruction, LocationSummary::kCallOnSlowPath);
   5161   // In suspend check slow path, usually there are no caller-save registers at all.
   5162   // If SIMD instructions are present, however, we force spilling all live SIMD
   5163   // registers in full width (since the runtime only saves/restores lower part).
   5164   locations->SetCustomSlowPathCallerSaves(
   5165       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
   5166 }
   5167 
   5168 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5169   HBasicBlock* block = instruction->GetBlock();
   5170   if (block->GetLoopInformation() != nullptr) {
   5171     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
   5172     // The back edge will generate the suspend check.
   5173     return;
   5174   }
   5175   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
   5176     // The goto will generate the suspend check.
   5177     return;
   5178   }
   5179   GenerateSuspendCheck(instruction, nullptr);
   5180 }
   5181 
   5182 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
   5183                                                           HBasicBlock* successor) {
   5184   SuspendCheckSlowPathX86_64* slow_path =
   5185       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
   5186   if (slow_path == nullptr) {
   5187     slow_path =
   5188         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
   5189     instruction->SetSlowPath(slow_path);
   5190     codegen_->AddSlowPath(slow_path);
   5191     if (successor != nullptr) {
   5192       DCHECK(successor->IsLoopHeader());
   5193     }
   5194   } else {
   5195     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   5196   }
   5197 
   5198   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
   5199                                   /* no_rip */ true),
   5200                 Immediate(0));
   5201   if (successor == nullptr) {
   5202     __ j(kNotEqual, slow_path->GetEntryLabel());
   5203     __ Bind(slow_path->GetReturnLabel());
   5204   } else {
   5205     __ j(kEqual, codegen_->GetLabelOf(successor));
   5206     __ jmp(slow_path->GetEntryLabel());
   5207   }
   5208 }
   5209 
   5210 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
   5211   return codegen_->GetAssembler();
   5212 }
   5213 
   5214 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
   5215   MoveOperands* move = moves_[index];
   5216   Location source = move->GetSource();
   5217   Location destination = move->GetDestination();
   5218 
   5219   if (source.IsRegister()) {
   5220     if (destination.IsRegister()) {
   5221       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
   5222     } else if (destination.IsStackSlot()) {
   5223       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5224               source.AsRegister<CpuRegister>());
   5225     } else {
   5226       DCHECK(destination.IsDoubleStackSlot());
   5227       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5228               source.AsRegister<CpuRegister>());
   5229     }
   5230   } else if (source.IsStackSlot()) {
   5231     if (destination.IsRegister()) {
   5232       __ movl(destination.AsRegister<CpuRegister>(),
   5233               Address(CpuRegister(RSP), source.GetStackIndex()));
   5234     } else if (destination.IsFpuRegister()) {
   5235       __ movss(destination.AsFpuRegister<XmmRegister>(),
   5236               Address(CpuRegister(RSP), source.GetStackIndex()));
   5237     } else {
   5238       DCHECK(destination.IsStackSlot());
   5239       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   5240       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   5241     }
   5242   } else if (source.IsDoubleStackSlot()) {
   5243     if (destination.IsRegister()) {
   5244       __ movq(destination.AsRegister<CpuRegister>(),
   5245               Address(CpuRegister(RSP), source.GetStackIndex()));
   5246     } else if (destination.IsFpuRegister()) {
   5247       __ movsd(destination.AsFpuRegister<XmmRegister>(),
   5248                Address(CpuRegister(RSP), source.GetStackIndex()));
   5249     } else {
   5250       DCHECK(destination.IsDoubleStackSlot()) << destination;
   5251       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   5252       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   5253     }
   5254   } else if (source.IsSIMDStackSlot()) {
   5255     if (destination.IsFpuRegister()) {
   5256       __ movups(destination.AsFpuRegister<XmmRegister>(),
   5257                 Address(CpuRegister(RSP), source.GetStackIndex()));
   5258     } else {
   5259       DCHECK(destination.IsSIMDStackSlot());
   5260       size_t high = kX86_64WordSize;
   5261       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   5262       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   5263       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
   5264       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
   5265     }
   5266   } else if (source.IsConstant()) {
   5267     HConstant* constant = source.GetConstant();
   5268     if (constant->IsIntConstant() || constant->IsNullConstant()) {
   5269       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
   5270       if (destination.IsRegister()) {
   5271         if (value == 0) {
   5272           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   5273         } else {
   5274           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
   5275         }
   5276       } else {
   5277         DCHECK(destination.IsStackSlot()) << destination;
   5278         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
   5279       }
   5280     } else if (constant->IsLongConstant()) {
   5281       int64_t value = constant->AsLongConstant()->GetValue();
   5282       if (destination.IsRegister()) {
   5283         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
   5284       } else {
   5285         DCHECK(destination.IsDoubleStackSlot()) << destination;
   5286         codegen_->Store64BitValueToStack(destination, value);
   5287       }
   5288     } else if (constant->IsFloatConstant()) {
   5289       float fp_value = constant->AsFloatConstant()->GetValue();
   5290       if (destination.IsFpuRegister()) {
   5291         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   5292         codegen_->Load32BitValue(dest, fp_value);
   5293       } else {
   5294         DCHECK(destination.IsStackSlot()) << destination;
   5295         Immediate imm(bit_cast<int32_t, float>(fp_value));
   5296         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
   5297       }
   5298     } else {
   5299       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
   5300       double fp_value =  constant->AsDoubleConstant()->GetValue();
   5301       int64_t value = bit_cast<int64_t, double>(fp_value);
   5302       if (destination.IsFpuRegister()) {
   5303         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   5304         codegen_->Load64BitValue(dest, fp_value);
   5305       } else {
   5306         DCHECK(destination.IsDoubleStackSlot()) << destination;
   5307         codegen_->Store64BitValueToStack(destination, value);
   5308       }
   5309     }
   5310   } else if (source.IsFpuRegister()) {
   5311     if (destination.IsFpuRegister()) {
   5312       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
   5313     } else if (destination.IsStackSlot()) {
   5314       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5315                source.AsFpuRegister<XmmRegister>());
   5316     } else if (destination.IsDoubleStackSlot()) {
   5317       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5318                source.AsFpuRegister<XmmRegister>());
   5319     } else {
   5320        DCHECK(destination.IsSIMDStackSlot());
   5321       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5322                 source.AsFpuRegister<XmmRegister>());
   5323     }
   5324   }
   5325 }
   5326 
   5327 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
   5328   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5329   __ movl(Address(CpuRegister(RSP), mem), reg);
   5330   __ movl(reg, CpuRegister(TMP));
   5331 }
   5332 
   5333 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
   5334   __ movq(CpuRegister(TMP), reg1);
   5335   __ movq(reg1, reg2);
   5336   __ movq(reg2, CpuRegister(TMP));
   5337 }
   5338 
   5339 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
   5340   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5341   __ movq(Address(CpuRegister(RSP), mem), reg);
   5342   __ movq(reg, CpuRegister(TMP));
   5343 }
   5344 
   5345 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
   5346   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5347   __ movss(Address(CpuRegister(RSP), mem), reg);
   5348   __ movd(reg, CpuRegister(TMP));
   5349 }
   5350 
   5351 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
   5352   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5353   __ movsd(Address(CpuRegister(RSP), mem), reg);
   5354   __ movd(reg, CpuRegister(TMP));
   5355 }
   5356 
   5357 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
   5358   size_t extra_slot = 2 * kX86_64WordSize;
   5359   __ subq(CpuRegister(RSP), Immediate(extra_slot));
   5360   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
   5361   ExchangeMemory64(0, mem + extra_slot, 2);
   5362   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
   5363   __ addq(CpuRegister(RSP), Immediate(extra_slot));
   5364 }
   5365 
   5366 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
   5367   ScratchRegisterScope ensure_scratch(
   5368       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
   5369 
   5370   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
   5371   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
   5372   __ movl(CpuRegister(ensure_scratch.GetRegister()),
   5373           Address(CpuRegister(RSP), mem2 + stack_offset));
   5374   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
   5375   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
   5376           CpuRegister(ensure_scratch.GetRegister()));
   5377 }
   5378 
   5379 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
   5380   ScratchRegisterScope ensure_scratch(
   5381       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
   5382 
   5383   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
   5384 
   5385   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
   5386   for (int i = 0; i < num_of_qwords; i++) {
   5387     __ movq(CpuRegister(TMP),
   5388             Address(CpuRegister(RSP), mem1 + stack_offset));
   5389     __ movq(CpuRegister(ensure_scratch.GetRegister()),
   5390             Address(CpuRegister(RSP), mem2 + stack_offset));
   5391     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
   5392             CpuRegister(TMP));
   5393     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
   5394             CpuRegister(ensure_scratch.GetRegister()));
   5395     stack_offset += kX86_64WordSize;
   5396   }
   5397 }
   5398 
   5399 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   5400   MoveOperands* move = moves_[index];
   5401   Location source = move->GetSource();
   5402   Location destination = move->GetDestination();
   5403 
   5404   if (source.IsRegister() && destination.IsRegister()) {
   5405     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   5406   } else if (source.IsRegister() && destination.IsStackSlot()) {
   5407     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   5408   } else if (source.IsStackSlot() && destination.IsRegister()) {
   5409     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   5410   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
   5411     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
   5412   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
   5413     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   5414   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
   5415     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   5416   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
   5417     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
   5418   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
   5419     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
   5420     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
   5421     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
   5422   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
   5423     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   5424   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
   5425     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   5426   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
   5427     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   5428   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
   5429     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   5430   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
   5431     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
   5432   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
   5433     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   5434   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
   5435     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   5436   } else {
   5437     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
   5438   }
   5439 }
   5440 
   5441 
   5442 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
   5443   __ pushq(CpuRegister(reg));
   5444 }
   5445 
   5446 
   5447 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
   5448   __ popq(CpuRegister(reg));
   5449 }
   5450 
   5451 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
   5452     SlowPathCode* slow_path, CpuRegister class_reg) {
   5453   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
   5454   const size_t status_byte_offset =
   5455       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
   5456   constexpr uint32_t shifted_initialized_value =
   5457       enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
   5458 
   5459   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_initialized_value));
   5460   __ j(kBelow, slow_path->GetEntryLabel());
   5461   __ Bind(slow_path->GetExitLabel());
   5462   // No need for memory fence, thanks to the x86-64 memory model.
   5463 }
   5464 
   5465 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
   5466     HLoadClass::LoadKind desired_class_load_kind) {
   5467   switch (desired_class_load_kind) {
   5468     case HLoadClass::LoadKind::kInvalid:
   5469       LOG(FATAL) << "UNREACHABLE";
   5470       UNREACHABLE();
   5471     case HLoadClass::LoadKind::kReferrersClass:
   5472       break;
   5473     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
   5474     case HLoadClass::LoadKind::kBootImageClassTable:
   5475     case HLoadClass::LoadKind::kBssEntry:
   5476       DCHECK(!Runtime::Current()->UseJitCompilation());
   5477       break;
   5478     case HLoadClass::LoadKind::kJitTableAddress:
   5479       DCHECK(Runtime::Current()->UseJitCompilation());
   5480       break;
   5481     case HLoadClass::LoadKind::kBootImageAddress:
   5482     case HLoadClass::LoadKind::kRuntimeCall:
   5483       break;
   5484   }
   5485   return desired_class_load_kind;
   5486 }
   5487 
   5488 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
   5489   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   5490   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
   5491     // Custom calling convention: RAX serves as both input and output.
   5492     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
   5493         cls,
   5494         Location::RegisterLocation(RAX),
   5495         Location::RegisterLocation(RAX));
   5496     return;
   5497   }
   5498   DCHECK(!cls->NeedsAccessCheck());
   5499 
   5500   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   5501   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
   5502       ? LocationSummary::kCallOnSlowPath
   5503       : LocationSummary::kNoCall;
   5504   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
   5505   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
   5506     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   5507   }
   5508 
   5509   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
   5510     locations->SetInAt(0, Location::RequiresRegister());
   5511   }
   5512   locations->SetOut(Location::RequiresRegister());
   5513   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
   5514     if (!kUseReadBarrier || kUseBakerReadBarrier) {
   5515       // Rely on the type resolution and/or initialization to save everything.
   5516       // Custom calling convention: RAX serves as both input and output.
   5517       RegisterSet caller_saves = RegisterSet::Empty();
   5518       caller_saves.Add(Location::RegisterLocation(RAX));
   5519       locations->SetCustomSlowPathCallerSaves(caller_saves);
   5520     } else {
   5521       // For non-Baker read barrier we have a temp-clobbering call.
   5522     }
   5523   }
   5524 }
   5525 
   5526 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
   5527                                                  dex::TypeIndex type_index,
   5528                                                  Handle<mirror::Class> handle) {
   5529   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
   5530   // Add a patch entry and return the label.
   5531   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
   5532   PatchInfo<Label>* info = &jit_class_patches_.back();
   5533   return &info->label;
   5534 }
   5535 
   5536 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   5537 // move.
   5538 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   5539   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   5540   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
   5541     codegen_->GenerateLoadClassRuntimeCall(cls);
   5542     return;
   5543   }
   5544   DCHECK(!cls->NeedsAccessCheck());
   5545 
   5546   LocationSummary* locations = cls->GetLocations();
   5547   Location out_loc = locations->Out();
   5548   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5549 
   5550   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
   5551       ? kWithoutReadBarrier
   5552       : kCompilerReadBarrierOption;
   5553   bool generate_null_check = false;
   5554   switch (load_kind) {
   5555     case HLoadClass::LoadKind::kReferrersClass: {
   5556       DCHECK(!cls->CanCallRuntime());
   5557       DCHECK(!cls->MustGenerateClinitCheck());
   5558       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
   5559       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
   5560       GenerateGcRootFieldLoad(
   5561           cls,
   5562           out_loc,
   5563           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
   5564           /* fixup_label */ nullptr,
   5565           read_barrier_option);
   5566       break;
   5567     }
   5568     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
   5569       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
   5570       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   5571       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
   5572       codegen_->RecordBootImageTypePatch(cls);
   5573       break;
   5574     case HLoadClass::LoadKind::kBootImageAddress: {
   5575       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
   5576       uint32_t address = dchecked_integral_cast<uint32_t>(
   5577           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
   5578       DCHECK_NE(address, 0u);
   5579       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
   5580       break;
   5581     }
   5582     case HLoadClass::LoadKind::kBootImageClassTable: {
   5583       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
   5584       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
   5585       codegen_->RecordBootImageTypePatch(cls);
   5586       // Extract the reference from the slot data, i.e. clear the hash bits.
   5587       int32_t masked_hash = ClassTable::TableSlot::MaskHash(
   5588           ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
   5589       if (masked_hash != 0) {
   5590         __ subl(out, Immediate(masked_hash));
   5591       }
   5592       break;
   5593     }
   5594     case HLoadClass::LoadKind::kBssEntry: {
   5595       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5596                                           /* no_rip */ false);
   5597       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
   5598       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
   5599       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
   5600       generate_null_check = true;
   5601       break;
   5602     }
   5603     case HLoadClass::LoadKind::kJitTableAddress: {
   5604       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5605                                           /* no_rip */ true);
   5606       Label* fixup_label =
   5607           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
   5608       // /* GcRoot<mirror::Class> */ out = *address
   5609       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
   5610       break;
   5611     }
   5612     default:
   5613       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
   5614       UNREACHABLE();
   5615   }
   5616 
   5617   if (generate_null_check || cls->MustGenerateClinitCheck()) {
   5618     DCHECK(cls->CanCallRuntime());
   5619     SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
   5620         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
   5621     codegen_->AddSlowPath(slow_path);
   5622     if (generate_null_check) {
   5623       __ testl(out, out);
   5624       __ j(kEqual, slow_path->GetEntryLabel());
   5625     }
   5626     if (cls->MustGenerateClinitCheck()) {
   5627       GenerateClassInitializationCheck(slow_path, out);
   5628     } else {
   5629       __ Bind(slow_path->GetExitLabel());
   5630     }
   5631   }
   5632 }
   5633 
   5634 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
   5635   LocationSummary* locations =
   5636       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
   5637   locations->SetInAt(0, Location::RequiresRegister());
   5638   if (check->HasUses()) {
   5639     locations->SetOut(Location::SameAsFirstInput());
   5640   }
   5641 }
   5642 
   5643 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
   5644   // We assume the class to not be null.
   5645   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
   5646       check->GetLoadClass(), check, check->GetDexPc(), true);
   5647   codegen_->AddSlowPath(slow_path);
   5648   GenerateClassInitializationCheck(slow_path,
   5649                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
   5650 }
   5651 
   5652 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
   5653     HLoadString::LoadKind desired_string_load_kind) {
   5654   switch (desired_string_load_kind) {
   5655     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
   5656     case HLoadString::LoadKind::kBootImageInternTable:
   5657     case HLoadString::LoadKind::kBssEntry:
   5658       DCHECK(!Runtime::Current()->UseJitCompilation());
   5659       break;
   5660     case HLoadString::LoadKind::kJitTableAddress:
   5661       DCHECK(Runtime::Current()->UseJitCompilation());
   5662       break;
   5663     case HLoadString::LoadKind::kBootImageAddress:
   5664     case HLoadString::LoadKind::kRuntimeCall:
   5665       break;
   5666   }
   5667   return desired_string_load_kind;
   5668 }
   5669 
   5670 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
   5671   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   5672   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
   5673   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
   5674     locations->SetOut(Location::RegisterLocation(RAX));
   5675   } else {
   5676     locations->SetOut(Location::RequiresRegister());
   5677     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
   5678       if (!kUseReadBarrier || kUseBakerReadBarrier) {
   5679         // Rely on the pResolveString to save everything.
   5680         // Custom calling convention: RAX serves as both input and output.
   5681         RegisterSet caller_saves = RegisterSet::Empty();
   5682         caller_saves.Add(Location::RegisterLocation(RAX));
   5683         locations->SetCustomSlowPathCallerSaves(caller_saves);
   5684       } else {
   5685         // For non-Baker read barrier we have a temp-clobbering call.
   5686       }
   5687     }
   5688   }
   5689 }
   5690 
   5691 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
   5692                                                   dex::StringIndex string_index,
   5693                                                   Handle<mirror::String> handle) {
   5694   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
   5695   // Add a patch entry and return the label.
   5696   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
   5697   PatchInfo<Label>* info = &jit_string_patches_.back();
   5698   return &info->label;
   5699 }
   5700 
   5701 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
   5702 // move.
   5703 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
   5704   LocationSummary* locations = load->GetLocations();
   5705   Location out_loc = locations->Out();
   5706   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5707 
   5708   switch (load->GetLoadKind()) {
   5709     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
   5710       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
   5711       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
   5712       codegen_->RecordBootImageStringPatch(load);
   5713       return;
   5714     }
   5715     case HLoadString::LoadKind::kBootImageAddress: {
   5716       uint32_t address = dchecked_integral_cast<uint32_t>(
   5717           reinterpret_cast<uintptr_t>(load->GetString().Get()));
   5718       DCHECK_NE(address, 0u);
   5719       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
   5720       return;
   5721     }
   5722     case HLoadString::LoadKind::kBootImageInternTable: {
   5723       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
   5724       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
   5725       codegen_->RecordBootImageStringPatch(load);
   5726       return;
   5727     }
   5728     case HLoadString::LoadKind::kBssEntry: {
   5729       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5730                                           /* no_rip */ false);
   5731       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
   5732       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
   5733       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
   5734       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
   5735       codegen_->AddSlowPath(slow_path);
   5736       __ testl(out, out);
   5737       __ j(kEqual, slow_path->GetEntryLabel());
   5738       __ Bind(slow_path->GetExitLabel());
   5739       return;
   5740     }
   5741     case HLoadString::LoadKind::kJitTableAddress: {
   5742       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5743                                           /* no_rip */ true);
   5744       Label* fixup_label = codegen_->NewJitRootStringPatch(
   5745           load->GetDexFile(), load->GetStringIndex(), load->GetString());
   5746       // /* GcRoot<mirror::String> */ out = *address
   5747       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
   5748       return;
   5749     }
   5750     default:
   5751       break;
   5752   }
   5753 
   5754   // TODO: Re-add the compiler code to do string dex cache lookup again.
   5755   // Custom calling convention: RAX serves as both input and output.
   5756   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
   5757   codegen_->InvokeRuntime(kQuickResolveString,
   5758                           load,
   5759                           load->GetDexPc());
   5760   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
   5761 }
   5762 
   5763 static Address GetExceptionTlsAddress() {
   5764   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
   5765                            /* no_rip */ true);
   5766 }
   5767 
   5768 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
   5769   LocationSummary* locations =
   5770       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
   5771   locations->SetOut(Location::RequiresRegister());
   5772 }
   5773 
   5774 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
   5775   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
   5776 }
   5777 
   5778 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
   5779   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
   5780 }
   5781 
   5782 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
   5783   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
   5784 }
   5785 
   5786 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
   5787   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
   5788       instruction, LocationSummary::kCallOnMainOnly);
   5789   InvokeRuntimeCallingConvention calling_convention;
   5790   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   5791 }
   5792 
   5793 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
   5794   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
   5795   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
   5796 }
   5797 
   5798 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   5799   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
   5800     // We need a temporary for holding the iftable length.
   5801     return true;
   5802   }
   5803   return kEmitCompilerReadBarrier &&
   5804       !kUseBakerReadBarrier &&
   5805       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   5806        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   5807        type_check_kind == TypeCheckKind::kArrayObjectCheck);
   5808 }
   5809 
   5810 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   5811   return kEmitCompilerReadBarrier &&
   5812       !kUseBakerReadBarrier &&
   5813       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   5814        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   5815        type_check_kind == TypeCheckKind::kArrayObjectCheck);
   5816 }
   5817 
   5818 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   5819   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   5820   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5821   bool baker_read_barrier_slow_path = false;
   5822   switch (type_check_kind) {
   5823     case TypeCheckKind::kExactCheck:
   5824     case TypeCheckKind::kAbstractClassCheck:
   5825     case TypeCheckKind::kClassHierarchyCheck:
   5826     case TypeCheckKind::kArrayObjectCheck: {
   5827       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
   5828       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
   5829       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
   5830       break;
   5831     }
   5832     case TypeCheckKind::kArrayCheck:
   5833     case TypeCheckKind::kUnresolvedCheck:
   5834     case TypeCheckKind::kInterfaceCheck:
   5835       call_kind = LocationSummary::kCallOnSlowPath;
   5836       break;
   5837   }
   5838 
   5839   LocationSummary* locations =
   5840       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   5841   if (baker_read_barrier_slow_path) {
   5842     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   5843   }
   5844   locations->SetInAt(0, Location::RequiresRegister());
   5845   locations->SetInAt(1, Location::Any());
   5846   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
   5847   locations->SetOut(Location::RequiresRegister());
   5848   // When read barriers are enabled, we need a temporary register for
   5849   // some cases.
   5850   if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
   5851     locations->AddTemp(Location::RequiresRegister());
   5852   }
   5853 }
   5854 
   5855 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   5856   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5857   LocationSummary* locations = instruction->GetLocations();
   5858   Location obj_loc = locations->InAt(0);
   5859   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   5860   Location cls = locations->InAt(1);
   5861   Location out_loc =  locations->Out();
   5862   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5863   Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
   5864       locations->GetTemp(0) :
   5865       Location::NoLocation();
   5866   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   5867   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   5868   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   5869   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   5870   SlowPathCode* slow_path = nullptr;
   5871   NearLabel done, zero;
   5872 
   5873   // Return 0 if `obj` is null.
   5874   // Avoid null check if we know obj is not null.
   5875   if (instruction->MustDoNullCheck()) {
   5876     __ testl(obj, obj);
   5877     __ j(kEqual, &zero);
   5878   }
   5879 
   5880   switch (type_check_kind) {
   5881     case TypeCheckKind::kExactCheck: {
   5882       ReadBarrierOption read_barrier_option =
   5883           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
   5884       // /* HeapReference<Class> */ out = obj->klass_
   5885       GenerateReferenceLoadTwoRegisters(instruction,
   5886                                         out_loc,
   5887                                         obj_loc,
   5888                                         class_offset,
   5889                                         read_barrier_option);
   5890       if (cls.IsRegister()) {
   5891         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5892       } else {
   5893         DCHECK(cls.IsStackSlot()) << cls;
   5894         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5895       }
   5896       if (zero.IsLinked()) {
   5897         // Classes must be equal for the instanceof to succeed.
   5898         __ j(kNotEqual, &zero);
   5899         __ movl(out, Immediate(1));
   5900         __ jmp(&done);
   5901       } else {
   5902         __ setcc(kEqual, out);
   5903         // setcc only sets the low byte.
   5904         __ andl(out, Immediate(1));
   5905       }
   5906       break;
   5907     }
   5908 
   5909     case TypeCheckKind::kAbstractClassCheck: {
   5910       ReadBarrierOption read_barrier_option =
   5911           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
   5912       // /* HeapReference<Class> */ out = obj->klass_
   5913       GenerateReferenceLoadTwoRegisters(instruction,
   5914                                         out_loc,
   5915                                         obj_loc,
   5916                                         class_offset,
   5917                                         read_barrier_option);
   5918       // If the class is abstract, we eagerly fetch the super class of the
   5919       // object to avoid doing a comparison we know will fail.
   5920       NearLabel loop, success;
   5921       __ Bind(&loop);
   5922       // /* HeapReference<Class> */ out = out->super_class_
   5923       GenerateReferenceLoadOneRegister(instruction,
   5924                                        out_loc,
   5925                                        super_offset,
   5926                                        maybe_temp_loc,
   5927                                        read_barrier_option);
   5928       __ testl(out, out);
   5929       // If `out` is null, we use it for the result, and jump to `done`.
   5930       __ j(kEqual, &done);
   5931       if (cls.IsRegister()) {
   5932         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5933       } else {
   5934         DCHECK(cls.IsStackSlot()) << cls;
   5935         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5936       }
   5937       __ j(kNotEqual, &loop);
   5938       __ movl(out, Immediate(1));
   5939       if (zero.IsLinked()) {
   5940         __ jmp(&done);
   5941       }
   5942       break;
   5943     }
   5944 
   5945     case TypeCheckKind::kClassHierarchyCheck: {
   5946       ReadBarrierOption read_barrier_option =
   5947           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
   5948       // /* HeapReference<Class> */ out = obj->klass_
   5949       GenerateReferenceLoadTwoRegisters(instruction,
   5950                                         out_loc,
   5951                                         obj_loc,
   5952                                         class_offset,
   5953                                         read_barrier_option);
   5954       // Walk over the class hierarchy to find a match.
   5955       NearLabel loop, success;
   5956       __ Bind(&loop);
   5957       if (cls.IsRegister()) {
   5958         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5959       } else {
   5960         DCHECK(cls.IsStackSlot()) << cls;
   5961         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5962       }
   5963       __ j(kEqual, &success);
   5964       // /* HeapReference<Class> */ out = out->super_class_
   5965       GenerateReferenceLoadOneRegister(instruction,
   5966                                        out_loc,
   5967                                        super_offset,
   5968                                        maybe_temp_loc,
   5969                                        read_barrier_option);
   5970       __ testl(out, out);
   5971       __ j(kNotEqual, &loop);
   5972       // If `out` is null, we use it for the result, and jump to `done`.
   5973       __ jmp(&done);
   5974       __ Bind(&success);
   5975       __ movl(out, Immediate(1));
   5976       if (zero.IsLinked()) {
   5977         __ jmp(&done);
   5978       }
   5979       break;
   5980     }
   5981 
   5982     case TypeCheckKind::kArrayObjectCheck: {
   5983       ReadBarrierOption read_barrier_option =
   5984           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
   5985       // /* HeapReference<Class> */ out = obj->klass_
   5986       GenerateReferenceLoadTwoRegisters(instruction,
   5987                                         out_loc,
   5988                                         obj_loc,
   5989                                         class_offset,
   5990                                         read_barrier_option);
   5991       // Do an exact check.
   5992       NearLabel exact_check;
   5993       if (cls.IsRegister()) {
   5994         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5995       } else {
   5996         DCHECK(cls.IsStackSlot()) << cls;
   5997         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5998       }
   5999       __ j(kEqual, &exact_check);
   6000       // Otherwise, we need to check that the object's class is a non-primitive array.
   6001       // /* HeapReference<Class> */ out = out->component_type_
   6002       GenerateReferenceLoadOneRegister(instruction,
   6003                                        out_loc,
   6004                                        component_offset,
   6005                                        maybe_temp_loc,
   6006                                        read_barrier_option);
   6007       __ testl(out, out);
   6008       // If `out` is null, we use it for the result, and jump to `done`.
   6009       __ j(kEqual, &done);
   6010       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
   6011       __ j(kNotEqual, &zero);
   6012       __ Bind(&exact_check);
   6013       __ movl(out, Immediate(1));
   6014       __ jmp(&done);
   6015       break;
   6016     }
   6017 
   6018     case TypeCheckKind::kArrayCheck: {
   6019       // No read barrier since the slow path will retry upon failure.
   6020       // /* HeapReference<Class> */ out = obj->klass_
   6021       GenerateReferenceLoadTwoRegisters(instruction,
   6022                                         out_loc,
   6023                                         obj_loc,
   6024                                         class_offset,
   6025                                         kWithoutReadBarrier);
   6026       if (cls.IsRegister()) {
   6027         __ cmpl(out, cls.AsRegister<CpuRegister>());
   6028       } else {
   6029         DCHECK(cls.IsStackSlot()) << cls;
   6030         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6031       }
   6032       DCHECK(locations->OnlyCallsOnSlowPath());
   6033       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
   6034           instruction, /* is_fatal */ false);
   6035       codegen_->AddSlowPath(slow_path);
   6036       __ j(kNotEqual, slow_path->GetEntryLabel());
   6037       __ movl(out, Immediate(1));
   6038       if (zero.IsLinked()) {
   6039         __ jmp(&done);
   6040       }
   6041       break;
   6042     }
   6043 
   6044     case TypeCheckKind::kUnresolvedCheck:
   6045     case TypeCheckKind::kInterfaceCheck: {
   6046       // Note that we indeed only call on slow path, but we always go
   6047       // into the slow path for the unresolved and interface check
   6048       // cases.
   6049       //
   6050       // We cannot directly call the InstanceofNonTrivial runtime
   6051       // entry point without resorting to a type checking slow path
   6052       // here (i.e. by calling InvokeRuntime directly), as it would
   6053       // require to assign fixed registers for the inputs of this
   6054       // HInstanceOf instruction (following the runtime calling
   6055       // convention), which might be cluttered by the potential first
   6056       // read barrier emission at the beginning of this method.
   6057       //
   6058       // TODO: Introduce a new runtime entry point taking the object
   6059       // to test (instead of its class) as argument, and let it deal
   6060       // with the read barrier issues. This will let us refactor this
   6061       // case of the `switch` code as it was previously (with a direct
   6062       // call to the runtime not using a type checking slow path).
   6063       // This should also be beneficial for the other cases above.
   6064       DCHECK(locations->OnlyCallsOnSlowPath());
   6065       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
   6066           instruction, /* is_fatal */ false);
   6067       codegen_->AddSlowPath(slow_path);
   6068       __ jmp(slow_path->GetEntryLabel());
   6069       if (zero.IsLinked()) {
   6070         __ jmp(&done);
   6071       }
   6072       break;
   6073     }
   6074   }
   6075 
   6076   if (zero.IsLinked()) {
   6077     __ Bind(&zero);
   6078     __ xorl(out, out);
   6079   }
   6080 
   6081   if (done.IsLinked()) {
   6082     __ Bind(&done);
   6083   }
   6084 
   6085   if (slow_path != nullptr) {
   6086     __ Bind(slow_path->GetExitLabel());
   6087   }
   6088 }
   6089 
   6090 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
   6091   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   6092   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
   6093   LocationSummary* locations =
   6094       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   6095   locations->SetInAt(0, Location::RequiresRegister());
   6096   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
   6097     // Require a register for the interface check since there is a loop that compares the class to
   6098     // a memory address.
   6099     locations->SetInAt(1, Location::RequiresRegister());
   6100   } else {
   6101     locations->SetInAt(1, Location::Any());
   6102   }
   6103 
   6104   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
   6105   locations->AddTemp(Location::RequiresRegister());
   6106   // When read barriers are enabled, we need an additional temporary
   6107   // register for some cases.
   6108   if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
   6109     locations->AddTemp(Location::RequiresRegister());
   6110   }
   6111 }
   6112 
   6113 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
   6114   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   6115   LocationSummary* locations = instruction->GetLocations();
   6116   Location obj_loc = locations->InAt(0);
   6117   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   6118   Location cls = locations->InAt(1);
   6119   Location temp_loc = locations->GetTemp(0);
   6120   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
   6121   Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
   6122       locations->GetTemp(1) :
   6123       Location::NoLocation();
   6124   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   6125   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   6126   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   6127   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   6128   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
   6129   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
   6130   const uint32_t object_array_data_offset =
   6131       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
   6132 
   6133   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
   6134   SlowPathCode* type_check_slow_path =
   6135       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
   6136           instruction, is_type_check_slow_path_fatal);
   6137   codegen_->AddSlowPath(type_check_slow_path);
   6138 
   6139 
   6140   NearLabel done;
   6141   // Avoid null check if we know obj is not null.
   6142   if (instruction->MustDoNullCheck()) {
   6143     __ testl(obj, obj);
   6144     __ j(kEqual, &done);
   6145   }
   6146 
   6147   switch (type_check_kind) {
   6148     case TypeCheckKind::kExactCheck:
   6149     case TypeCheckKind::kArrayCheck: {
   6150       // /* HeapReference<Class> */ temp = obj->klass_
   6151       GenerateReferenceLoadTwoRegisters(instruction,
   6152                                         temp_loc,
   6153                                         obj_loc,
   6154                                         class_offset,
   6155                                         kWithoutReadBarrier);
   6156       if (cls.IsRegister()) {
   6157         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6158       } else {
   6159         DCHECK(cls.IsStackSlot()) << cls;
   6160         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6161       }
   6162       // Jump to slow path for throwing the exception or doing a
   6163       // more involved array check.
   6164       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
   6165       break;
   6166     }
   6167 
   6168     case TypeCheckKind::kAbstractClassCheck: {
   6169       // /* HeapReference<Class> */ temp = obj->klass_
   6170       GenerateReferenceLoadTwoRegisters(instruction,
   6171                                         temp_loc,
   6172                                         obj_loc,
   6173                                         class_offset,
   6174                                         kWithoutReadBarrier);
   6175       // If the class is abstract, we eagerly fetch the super class of the
   6176       // object to avoid doing a comparison we know will fail.
   6177       NearLabel loop;
   6178       __ Bind(&loop);
   6179       // /* HeapReference<Class> */ temp = temp->super_class_
   6180       GenerateReferenceLoadOneRegister(instruction,
   6181                                        temp_loc,
   6182                                        super_offset,
   6183                                        maybe_temp2_loc,
   6184                                        kWithoutReadBarrier);
   6185 
   6186       // If the class reference currently in `temp` is null, jump to the slow path to throw the
   6187       // exception.
   6188       __ testl(temp, temp);
   6189       // Otherwise, compare the classes.
   6190       __ j(kZero, type_check_slow_path->GetEntryLabel());
   6191       if (cls.IsRegister()) {
   6192         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6193       } else {
   6194         DCHECK(cls.IsStackSlot()) << cls;
   6195         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6196       }
   6197       __ j(kNotEqual, &loop);
   6198       break;
   6199     }
   6200 
   6201     case TypeCheckKind::kClassHierarchyCheck: {
   6202       // /* HeapReference<Class> */ temp = obj->klass_
   6203       GenerateReferenceLoadTwoRegisters(instruction,
   6204                                         temp_loc,
   6205                                         obj_loc,
   6206                                         class_offset,
   6207                                         kWithoutReadBarrier);
   6208       // Walk over the class hierarchy to find a match.
   6209       NearLabel loop;
   6210       __ Bind(&loop);
   6211       if (cls.IsRegister()) {
   6212         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6213       } else {
   6214         DCHECK(cls.IsStackSlot()) << cls;
   6215         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6216       }
   6217       __ j(kEqual, &done);
   6218 
   6219       // /* HeapReference<Class> */ temp = temp->super_class_
   6220       GenerateReferenceLoadOneRegister(instruction,
   6221                                        temp_loc,
   6222                                        super_offset,
   6223                                        maybe_temp2_loc,
   6224                                        kWithoutReadBarrier);
   6225 
   6226       // If the class reference currently in `temp` is not null, jump
   6227       // back at the beginning of the loop.
   6228       __ testl(temp, temp);
   6229       __ j(kNotZero, &loop);
   6230       // Otherwise, jump to the slow path to throw the exception.
   6231       __ jmp(type_check_slow_path->GetEntryLabel());
   6232       break;
   6233     }
   6234 
   6235     case TypeCheckKind::kArrayObjectCheck: {
   6236       // /* HeapReference<Class> */ temp = obj->klass_
   6237       GenerateReferenceLoadTwoRegisters(instruction,
   6238                                         temp_loc,
   6239                                         obj_loc,
   6240                                         class_offset,
   6241                                         kWithoutReadBarrier);
   6242       // Do an exact check.
   6243       NearLabel check_non_primitive_component_type;
   6244       if (cls.IsRegister()) {
   6245         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   6246       } else {
   6247         DCHECK(cls.IsStackSlot()) << cls;
   6248         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   6249       }
   6250       __ j(kEqual, &done);
   6251 
   6252       // Otherwise, we need to check that the object's class is a non-primitive array.
   6253       // /* HeapReference<Class> */ temp = temp->component_type_
   6254       GenerateReferenceLoadOneRegister(instruction,
   6255                                        temp_loc,
   6256                                        component_offset,
   6257                                        maybe_temp2_loc,
   6258                                        kWithoutReadBarrier);
   6259 
   6260       // If the component type is not null (i.e. the object is indeed
   6261       // an array), jump to label `check_non_primitive_component_type`
   6262       // to further check that this component type is not a primitive
   6263       // type.
   6264       __ testl(temp, temp);
   6265       // Otherwise, jump to the slow path to throw the exception.
   6266       __ j(kZero, type_check_slow_path->GetEntryLabel());
   6267       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
   6268       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
   6269       break;
   6270     }
   6271 
   6272     case TypeCheckKind::kUnresolvedCheck: {
   6273       // We always go into the type check slow path for the unresolved case.
   6274       //
   6275       // We cannot directly call the CheckCast runtime entry point
   6276       // without resorting to a type checking slow path here (i.e. by
   6277       // calling InvokeRuntime directly), as it would require to
   6278       // assign fixed registers for the inputs of this HInstanceOf
   6279       // instruction (following the runtime calling convention), which
   6280       // might be cluttered by the potential first read barrier
   6281       // emission at the beginning of this method.
   6282       __ jmp(type_check_slow_path->GetEntryLabel());
   6283       break;
   6284     }
   6285 
   6286     case TypeCheckKind::kInterfaceCheck:
   6287       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
   6288       // We can not get false positives by doing this.
   6289       // /* HeapReference<Class> */ temp = obj->klass_
   6290       GenerateReferenceLoadTwoRegisters(instruction,
   6291                                         temp_loc,
   6292                                         obj_loc,
   6293                                         class_offset,
   6294                                         kWithoutReadBarrier);
   6295 
   6296       // /* HeapReference<Class> */ temp = temp->iftable_
   6297       GenerateReferenceLoadTwoRegisters(instruction,
   6298                                         temp_loc,
   6299                                         temp_loc,
   6300                                         iftable_offset,
   6301                                         kWithoutReadBarrier);
   6302       // Iftable is never null.
   6303       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
   6304       // Maybe poison the `cls` for direct comparison with memory.
   6305       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
   6306       // Loop through the iftable and check if any class matches.
   6307       NearLabel start_loop;
   6308       __ Bind(&start_loop);
   6309       // Need to subtract first to handle the empty array case.
   6310       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
   6311       __ j(kNegative, type_check_slow_path->GetEntryLabel());
   6312       // Go to next interface if the classes do not match.
   6313       __ cmpl(cls.AsRegister<CpuRegister>(),
   6314               CodeGeneratorX86_64::ArrayAddress(temp,
   6315                                                 maybe_temp2_loc,
   6316                                                 TIMES_4,
   6317                                                 object_array_data_offset));
   6318       __ j(kNotEqual, &start_loop);  // Return if same class.
   6319       // If `cls` was poisoned above, unpoison it.
   6320       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
   6321       break;
   6322   }
   6323 
   6324   if (done.IsLinked()) {
   6325     __ Bind(&done);
   6326   }
   6327 
   6328   __ Bind(type_check_slow_path->GetExitLabel());
   6329 }
   6330 
   6331 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   6332   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
   6333       instruction, LocationSummary::kCallOnMainOnly);
   6334   InvokeRuntimeCallingConvention calling_convention;
   6335   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   6336 }
   6337 
   6338 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   6339   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
   6340                           instruction,
   6341                           instruction->GetDexPc());
   6342   if (instruction->IsEnter()) {
   6343     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
   6344   } else {
   6345     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   6346   }
   6347 }
   6348 
   6349 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
   6350 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
   6351 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
   6352 
   6353 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
   6354   LocationSummary* locations =
   6355       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
   6356   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
   6357          || instruction->GetResultType() == DataType::Type::kInt64);
   6358   locations->SetInAt(0, Location::RequiresRegister());
   6359   locations->SetInAt(1, Location::Any());
   6360   locations->SetOut(Location::SameAsFirstInput());
   6361 }
   6362 
   6363 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
   6364   HandleBitwiseOperation(instruction);
   6365 }
   6366 
   6367 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
   6368   HandleBitwiseOperation(instruction);
   6369 }
   6370 
   6371 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
   6372   HandleBitwiseOperation(instruction);
   6373 }
   6374 
   6375 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
   6376   LocationSummary* locations = instruction->GetLocations();
   6377   Location first = locations->InAt(0);
   6378   Location second = locations->InAt(1);
   6379   DCHECK(first.Equals(locations->Out()));
   6380 
   6381   if (instruction->GetResultType() == DataType::Type::kInt32) {
   6382     if (second.IsRegister()) {
   6383       if (instruction->IsAnd()) {
   6384         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6385       } else if (instruction->IsOr()) {
   6386         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6387       } else {
   6388         DCHECK(instruction->IsXor());
   6389         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6390       }
   6391     } else if (second.IsConstant()) {
   6392       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
   6393       if (instruction->IsAnd()) {
   6394         __ andl(first.AsRegister<CpuRegister>(), imm);
   6395       } else if (instruction->IsOr()) {
   6396         __ orl(first.AsRegister<CpuRegister>(), imm);
   6397       } else {
   6398         DCHECK(instruction->IsXor());
   6399         __ xorl(first.AsRegister<CpuRegister>(), imm);
   6400       }
   6401     } else {
   6402       Address address(CpuRegister(RSP), second.GetStackIndex());
   6403       if (instruction->IsAnd()) {
   6404         __ andl(first.AsRegister<CpuRegister>(), address);
   6405       } else if (instruction->IsOr()) {
   6406         __ orl(first.AsRegister<CpuRegister>(), address);
   6407       } else {
   6408         DCHECK(instruction->IsXor());
   6409         __ xorl(first.AsRegister<CpuRegister>(), address);
   6410       }
   6411     }
   6412   } else {
   6413     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
   6414     CpuRegister first_reg = first.AsRegister<CpuRegister>();
   6415     bool second_is_constant = false;
   6416     int64_t value = 0;
   6417     if (second.IsConstant()) {
   6418       second_is_constant = true;
   6419       value = second.GetConstant()->AsLongConstant()->GetValue();
   6420     }
   6421     bool is_int32_value = IsInt<32>(value);
   6422 
   6423     if (instruction->IsAnd()) {
   6424       if (second_is_constant) {
   6425         if (is_int32_value) {
   6426           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
   6427         } else {
   6428           __ andq(first_reg, codegen_->LiteralInt64Address(value));
   6429         }
   6430       } else if (second.IsDoubleStackSlot()) {
   6431         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6432       } else {
   6433         __ andq(first_reg, second.AsRegister<CpuRegister>());
   6434       }
   6435     } else if (instruction->IsOr()) {
   6436       if (second_is_constant) {
   6437         if (is_int32_value) {
   6438           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
   6439         } else {
   6440           __ orq(first_reg, codegen_->LiteralInt64Address(value));
   6441         }
   6442       } else if (second.IsDoubleStackSlot()) {
   6443         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6444       } else {
   6445         __ orq(first_reg, second.AsRegister<CpuRegister>());
   6446       }
   6447     } else {
   6448       DCHECK(instruction->IsXor());
   6449       if (second_is_constant) {
   6450         if (is_int32_value) {
   6451           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
   6452         } else {
   6453           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
   6454         }
   6455       } else if (second.IsDoubleStackSlot()) {
   6456         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6457       } else {
   6458         __ xorq(first_reg, second.AsRegister<CpuRegister>());
   6459       }
   6460     }
   6461   }
   6462 }
   6463 
   6464 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
   6465     HInstruction* instruction,
   6466     Location out,
   6467     uint32_t offset,
   6468     Location maybe_temp,
   6469     ReadBarrierOption read_barrier_option) {
   6470   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   6471   if (read_barrier_option == kWithReadBarrier) {
   6472     CHECK(kEmitCompilerReadBarrier);
   6473     if (kUseBakerReadBarrier) {
   6474       // Load with fast path based Baker's read barrier.
   6475       // /* HeapReference<Object> */ out = *(out + offset)
   6476       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   6477           instruction, out, out_reg, offset, /* needs_null_check */ false);
   6478     } else {
   6479       // Load with slow path based read barrier.
   6480       // Save the value of `out` into `maybe_temp` before overwriting it
   6481       // in the following move operation, as we will need it for the
   6482       // read barrier below.
   6483       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
   6484       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
   6485       // /* HeapReference<Object> */ out = *(out + offset)
   6486       __ movl(out_reg, Address(out_reg, offset));
   6487       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
   6488     }
   6489   } else {
   6490     // Plain load with no read barrier.
   6491     // /* HeapReference<Object> */ out = *(out + offset)
   6492     __ movl(out_reg, Address(out_reg, offset));
   6493     __ MaybeUnpoisonHeapReference(out_reg);
   6494   }
   6495 }
   6496 
   6497 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
   6498     HInstruction* instruction,
   6499     Location out,
   6500     Location obj,
   6501     uint32_t offset,
   6502     ReadBarrierOption read_barrier_option) {
   6503   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   6504   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
   6505   if (read_barrier_option == kWithReadBarrier) {
   6506     CHECK(kEmitCompilerReadBarrier);
   6507     if (kUseBakerReadBarrier) {
   6508       // Load with fast path based Baker's read barrier.
   6509       // /* HeapReference<Object> */ out = *(obj + offset)
   6510       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   6511           instruction, out, obj_reg, offset, /* needs_null_check */ false);
   6512     } else {
   6513       // Load with slow path based read barrier.
   6514       // /* HeapReference<Object> */ out = *(obj + offset)
   6515       __ movl(out_reg, Address(obj_reg, offset));
   6516       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
   6517     }
   6518   } else {
   6519     // Plain load with no read barrier.
   6520     // /* HeapReference<Object> */ out = *(obj + offset)
   6521     __ movl(out_reg, Address(obj_reg, offset));
   6522     __ MaybeUnpoisonHeapReference(out_reg);
   6523   }
   6524 }
   6525 
   6526 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
   6527     HInstruction* instruction,
   6528     Location root,
   6529     const Address& address,
   6530     Label* fixup_label,
   6531     ReadBarrierOption read_barrier_option) {
   6532   CpuRegister root_reg = root.AsRegister<CpuRegister>();
   6533   if (read_barrier_option == kWithReadBarrier) {
   6534     DCHECK(kEmitCompilerReadBarrier);
   6535     if (kUseBakerReadBarrier) {
   6536       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
   6537       // Baker's read barrier are used:
   6538       //
   6539       //   root = obj.field;
   6540       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
   6541       //   if (temp != null) {
   6542       //     root = temp(root)
   6543       //   }
   6544 
   6545       // /* GcRoot<mirror::Object> */ root = *address
   6546       __ movl(root_reg, address);
   6547       if (fixup_label != nullptr) {
   6548         __ Bind(fixup_label);
   6549       }
   6550       static_assert(
   6551           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
   6552           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
   6553           "have different sizes.");
   6554       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
   6555                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
   6556                     "have different sizes.");
   6557 
   6558       // Slow path marking the GC root `root`.
   6559       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
   6560           instruction, root, /* unpoison_ref_before_marking */ false);
   6561       codegen_->AddSlowPath(slow_path);
   6562 
   6563       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
   6564       const int32_t entry_point_offset =
   6565           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
   6566       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
   6567       // The entrypoint is null when the GC is not marking.
   6568       __ j(kNotEqual, slow_path->GetEntryLabel());
   6569       __ Bind(slow_path->GetExitLabel());
   6570     } else {
   6571       // GC root loaded through a slow path for read barriers other
   6572       // than Baker's.
   6573       // /* GcRoot<mirror::Object>* */ root = address
   6574       __ leaq(root_reg, address);
   6575       if (fixup_label != nullptr) {
   6576         __ Bind(fixup_label);
   6577       }
   6578       // /* mirror::Object* */ root = root->Read()
   6579       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
   6580     }
   6581   } else {
   6582     // Plain GC root load with no read barrier.
   6583     // /* GcRoot<mirror::Object> */ root = *address
   6584     __ movl(root_reg, address);
   6585     if (fixup_label != nullptr) {
   6586       __ Bind(fixup_label);
   6587     }
   6588     // Note that GC roots are not affected by heap poisoning, thus we
   6589     // do not have to unpoison `root_reg` here.
   6590   }
   6591 }
   6592 
   6593 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
   6594                                                                 Location ref,
   6595                                                                 CpuRegister obj,
   6596                                                                 uint32_t offset,
   6597                                                                 bool needs_null_check) {
   6598   DCHECK(kEmitCompilerReadBarrier);
   6599   DCHECK(kUseBakerReadBarrier);
   6600 
   6601   // /* HeapReference<Object> */ ref = *(obj + offset)
   6602   Address src(obj, offset);
   6603   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
   6604 }
   6605 
   6606 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
   6607                                                                 Location ref,
   6608                                                                 CpuRegister obj,
   6609                                                                 uint32_t data_offset,
   6610                                                                 Location index,
   6611                                                                 bool needs_null_check) {
   6612   DCHECK(kEmitCompilerReadBarrier);
   6613   DCHECK(kUseBakerReadBarrier);
   6614 
   6615   static_assert(
   6616       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   6617       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   6618   // /* HeapReference<Object> */ ref =
   6619   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   6620   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
   6621   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
   6622 }
   6623 
   6624 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
   6625                                                                     Location ref,
   6626                                                                     CpuRegister obj,
   6627                                                                     const Address& src,
   6628                                                                     bool needs_null_check,
   6629                                                                     bool always_update_field,
   6630                                                                     CpuRegister* temp1,
   6631                                                                     CpuRegister* temp2) {
   6632   DCHECK(kEmitCompilerReadBarrier);
   6633   DCHECK(kUseBakerReadBarrier);
   6634 
   6635   // In slow path based read barriers, the read barrier call is
   6636   // inserted after the original load. However, in fast path based
   6637   // Baker's read barriers, we need to perform the load of
   6638   // mirror::Object::monitor_ *before* the original reference load.
   6639   // This load-load ordering is required by the read barrier.
   6640   // The fast path/slow path (for Baker's algorithm) should look like:
   6641   //
   6642   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   6643   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   6644   //   HeapReference<Object> ref = *src;  // Original reference load.
   6645   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   6646   //   if (is_gray) {
   6647   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   6648   //   }
   6649   //
   6650   // Note: the original implementation in ReadBarrier::Barrier is
   6651   // slightly more complex as:
   6652   // - it implements the load-load fence using a data dependency on
   6653   //   the high-bits of rb_state, which are expected to be all zeroes
   6654   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
   6655   //   here, which is a no-op thanks to the x86-64 memory model);
   6656   // - it performs additional checks that we do not do here for
   6657   //   performance reasons.
   6658 
   6659   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
   6660   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   6661 
   6662   // Given the numeric representation, it's enough to check the low bit of the rb_state.
   6663   static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   6664   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   6665   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   6666   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   6667   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
   6668 
   6669   // if (rb_state == ReadBarrier::GrayState())
   6670   //   ref = ReadBarrier::Mark(ref);
   6671   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   6672   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
   6673   if (needs_null_check) {
   6674     MaybeRecordImplicitNullCheck(instruction);
   6675   }
   6676 
   6677   // Load fence to prevent load-load reordering.
   6678   // Note that this is a no-op, thanks to the x86-64 memory model.
   6679   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   6680 
   6681   // The actual reference load.
   6682   // /* HeapReference<Object> */ ref = *src
   6683   __ movl(ref_reg, src);  // Flags are unaffected.
   6684 
   6685   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
   6686   // Slow path marking the object `ref` when it is gray.
   6687   SlowPathCode* slow_path;
   6688   if (always_update_field) {
   6689     DCHECK(temp1 != nullptr);
   6690     DCHECK(temp2 != nullptr);
   6691     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
   6692         instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
   6693   } else {
   6694     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
   6695         instruction, ref, /* unpoison_ref_before_marking */ true);
   6696   }
   6697   AddSlowPath(slow_path);
   6698 
   6699   // We have done the "if" of the gray bit check above, now branch based on the flags.
   6700   __ j(kNotZero, slow_path->GetEntryLabel());
   6701 
   6702   // Object* ref = ref_addr->AsMirrorPtr()
   6703   __ MaybeUnpoisonHeapReference(ref_reg);
   6704 
   6705   __ Bind(slow_path->GetExitLabel());
   6706 }
   6707 
   6708 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
   6709                                                   Location out,
   6710                                                   Location ref,
   6711                                                   Location obj,
   6712                                                   uint32_t offset,
   6713                                                   Location index) {
   6714   DCHECK(kEmitCompilerReadBarrier);
   6715 
   6716   // Insert a slow path based read barrier *after* the reference load.
   6717   //
   6718   // If heap poisoning is enabled, the unpoisoning of the loaded
   6719   // reference will be carried out by the runtime within the slow
   6720   // path.
   6721   //
   6722   // Note that `ref` currently does not get unpoisoned (when heap
   6723   // poisoning is enabled), which is alright as the `ref` argument is
   6724   // not used by the artReadBarrierSlow entry point.
   6725   //
   6726   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
   6727   SlowPathCode* slow_path = new (GetScopedAllocator())
   6728       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
   6729   AddSlowPath(slow_path);
   6730 
   6731   __ jmp(slow_path->GetEntryLabel());
   6732   __ Bind(slow_path->GetExitLabel());
   6733 }
   6734 
   6735 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
   6736                                                        Location out,
   6737                                                        Location ref,
   6738                                                        Location obj,
   6739                                                        uint32_t offset,
   6740                                                        Location index) {
   6741   if (kEmitCompilerReadBarrier) {
   6742     // Baker's read barriers shall be handled by the fast path
   6743     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
   6744     DCHECK(!kUseBakerReadBarrier);
   6745     // If heap poisoning is enabled, unpoisoning will be taken care of
   6746     // by the runtime within the slow path.
   6747     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   6748   } else if (kPoisonHeapReferences) {
   6749     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
   6750   }
   6751 }
   6752 
   6753 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
   6754                                                          Location out,
   6755                                                          Location root) {
   6756   DCHECK(kEmitCompilerReadBarrier);
   6757 
   6758   // Insert a slow path based read barrier *after* the GC root load.
   6759   //
   6760   // Note that GC roots are not affected by heap poisoning, so we do
   6761   // not need to do anything special for this here.
   6762   SlowPathCode* slow_path =
   6763       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
   6764   AddSlowPath(slow_path);
   6765 
   6766   __ jmp(slow_path->GetEntryLabel());
   6767   __ Bind(slow_path->GetExitLabel());
   6768 }
   6769 
   6770 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   6771   // Nothing to do, this should be removed during prepare for register allocator.
   6772   LOG(FATAL) << "Unreachable";
   6773 }
   6774 
   6775 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   6776   // Nothing to do, this should be removed during prepare for register allocator.
   6777   LOG(FATAL) << "Unreachable";
   6778 }
   6779 
   6780 // Simple implementation of packed switch - generate cascaded compare/jumps.
   6781 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   6782   LocationSummary* locations =
   6783       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   6784   locations->SetInAt(0, Location::RequiresRegister());
   6785   locations->AddTemp(Location::RequiresRegister());
   6786   locations->AddTemp(Location::RequiresRegister());
   6787 }
   6788 
   6789 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   6790   int32_t lower_bound = switch_instr->GetStartValue();
   6791   uint32_t num_entries = switch_instr->GetNumEntries();
   6792   LocationSummary* locations = switch_instr->GetLocations();
   6793   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   6794   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   6795   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   6796   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   6797 
   6798   // Should we generate smaller inline compare/jumps?
   6799   if (num_entries <= kPackedSwitchJumpTableThreshold) {
   6800     // Figure out the correct compare values and jump conditions.
   6801     // Handle the first compare/branch as a special case because it might
   6802     // jump to the default case.
   6803     DCHECK_GT(num_entries, 2u);
   6804     Condition first_condition;
   6805     uint32_t index;
   6806     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   6807     if (lower_bound != 0) {
   6808       first_condition = kLess;
   6809       __ cmpl(value_reg_in, Immediate(lower_bound));
   6810       __ j(first_condition, codegen_->GetLabelOf(default_block));
   6811       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
   6812 
   6813       index = 1;
   6814     } else {
   6815       // Handle all the compare/jumps below.
   6816       first_condition = kBelow;
   6817       index = 0;
   6818     }
   6819 
   6820     // Handle the rest of the compare/jumps.
   6821     for (; index + 1 < num_entries; index += 2) {
   6822       int32_t compare_to_value = lower_bound + index + 1;
   6823       __ cmpl(value_reg_in, Immediate(compare_to_value));
   6824       // Jump to successors[index] if value < case_value[index].
   6825       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
   6826       // Jump to successors[index + 1] if value == case_value[index + 1].
   6827       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
   6828     }
   6829 
   6830     if (index != num_entries) {
   6831       // There are an odd number of entries. Handle the last one.
   6832       DCHECK_EQ(index + 1, num_entries);
   6833       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
   6834       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   6835     }
   6836 
   6837     // And the default for any other value.
   6838     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
   6839       __ jmp(codegen_->GetLabelOf(default_block));
   6840     }
   6841     return;
   6842   }
   6843 
   6844   // Remove the bias, if needed.
   6845   Register value_reg_out = value_reg_in.AsRegister();
   6846   if (lower_bound != 0) {
   6847     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
   6848     value_reg_out = temp_reg.AsRegister();
   6849   }
   6850   CpuRegister value_reg(value_reg_out);
   6851 
   6852   // Is the value in range?
   6853   __ cmpl(value_reg, Immediate(num_entries - 1));
   6854   __ j(kAbove, codegen_->GetLabelOf(default_block));
   6855 
   6856   // We are in the range of the table.
   6857   // Load the address of the jump table in the constant area.
   6858   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
   6859 
   6860   // Load the (signed) offset from the jump table.
   6861   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
   6862 
   6863   // Add the offset to the address of the table base.
   6864   __ addq(temp_reg, base_reg);
   6865 
   6866   // And jump.
   6867   __ jmp(temp_reg);
   6868 }
   6869 
   6870 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
   6871                                                       ATTRIBUTE_UNUSED) {
   6872   LOG(FATAL) << "Unreachable";
   6873 }
   6874 
   6875 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
   6876                                                               ATTRIBUTE_UNUSED) {
   6877   LOG(FATAL) << "Unreachable";
   6878 }
   6879 
   6880 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
   6881   if (value == 0) {
   6882     __ xorl(dest, dest);
   6883   } else {
   6884     __ movl(dest, Immediate(value));
   6885   }
   6886 }
   6887 
   6888 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
   6889   if (value == 0) {
   6890     // Clears upper bits too.
   6891     __ xorl(dest, dest);
   6892   } else if (IsUint<32>(value)) {
   6893     // We can use a 32 bit move, as it will zero-extend and is shorter.
   6894     __ movl(dest, Immediate(static_cast<int32_t>(value)));
   6895   } else {
   6896     __ movq(dest, Immediate(value));
   6897   }
   6898 }
   6899 
   6900 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
   6901   if (value == 0) {
   6902     __ xorps(dest, dest);
   6903   } else {
   6904     __ movss(dest, LiteralInt32Address(value));
   6905   }
   6906 }
   6907 
   6908 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
   6909   if (value == 0) {
   6910     __ xorpd(dest, dest);
   6911   } else {
   6912     __ movsd(dest, LiteralInt64Address(value));
   6913   }
   6914 }
   6915 
   6916 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
   6917   Load32BitValue(dest, bit_cast<int32_t, float>(value));
   6918 }
   6919 
   6920 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
   6921   Load64BitValue(dest, bit_cast<int64_t, double>(value));
   6922 }
   6923 
   6924 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
   6925   if (value == 0) {
   6926     __ testl(dest, dest);
   6927   } else {
   6928     __ cmpl(dest, Immediate(value));
   6929   }
   6930 }
   6931 
   6932 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
   6933   if (IsInt<32>(value)) {
   6934     if (value == 0) {
   6935       __ testq(dest, dest);
   6936     } else {
   6937       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
   6938     }
   6939   } else {
   6940     // Value won't fit in an int.
   6941     __ cmpq(dest, LiteralInt64Address(value));
   6942   }
   6943 }
   6944 
   6945 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
   6946   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
   6947   GenerateIntCompare(lhs_reg, rhs);
   6948 }
   6949 
   6950 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
   6951   if (rhs.IsConstant()) {
   6952     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
   6953     Compare32BitValue(lhs, value);
   6954   } else if (rhs.IsStackSlot()) {
   6955     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   6956   } else {
   6957     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
   6958   }
   6959 }
   6960 
   6961 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
   6962   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
   6963   if (rhs.IsConstant()) {
   6964     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
   6965     Compare64BitValue(lhs_reg, value);
   6966   } else if (rhs.IsDoubleStackSlot()) {
   6967     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   6968   } else {
   6969     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
   6970   }
   6971 }
   6972 
   6973 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
   6974                                           Location index,
   6975                                           ScaleFactor scale,
   6976                                           uint32_t data_offset) {
   6977   return index.IsConstant() ?
   6978       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
   6979       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
   6980 }
   6981 
   6982 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
   6983   DCHECK(dest.IsDoubleStackSlot());
   6984   if (IsInt<32>(value)) {
   6985     // Can move directly as an int32 constant.
   6986     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
   6987             Immediate(static_cast<int32_t>(value)));
   6988   } else {
   6989     Load64BitValue(CpuRegister(TMP), value);
   6990     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
   6991   }
   6992 }
   6993 
   6994 /**
   6995  * Class to handle late fixup of offsets into constant area.
   6996  */
   6997 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
   6998  public:
   6999   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
   7000       : codegen_(&codegen), offset_into_constant_area_(offset) {}
   7001 
   7002  protected:
   7003   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
   7004 
   7005   CodeGeneratorX86_64* codegen_;
   7006 
   7007  private:
   7008   void Process(const MemoryRegion& region, int pos) OVERRIDE {
   7009     // Patch the correct offset for the instruction.  We use the address of the
   7010     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
   7011     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
   7012     int32_t relative_position = constant_offset - pos;
   7013 
   7014     // Patch in the right value.
   7015     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
   7016   }
   7017 
   7018   // Location in constant area that the fixup refers to.
   7019   size_t offset_into_constant_area_;
   7020 };
   7021 
   7022 /**
   7023  t * Class to handle late fixup of offsets to a jump table that will be created in the
   7024  * constant area.
   7025  */
   7026 class JumpTableRIPFixup : public RIPFixup {
   7027  public:
   7028   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
   7029       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
   7030 
   7031   void CreateJumpTable() {
   7032     X86_64Assembler* assembler = codegen_->GetAssembler();
   7033 
   7034     // Ensure that the reference to the jump table has the correct offset.
   7035     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
   7036     SetOffset(offset_in_constant_table);
   7037 
   7038     // Compute the offset from the start of the function to this jump table.
   7039     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
   7040 
   7041     // Populate the jump table with the correct values for the jump table.
   7042     int32_t num_entries = switch_instr_->GetNumEntries();
   7043     HBasicBlock* block = switch_instr_->GetBlock();
   7044     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
   7045     // The value that we want is the target offset - the position of the table.
   7046     for (int32_t i = 0; i < num_entries; i++) {
   7047       HBasicBlock* b = successors[i];
   7048       Label* l = codegen_->GetLabelOf(b);
   7049       DCHECK(l->IsBound());
   7050       int32_t offset_to_block = l->Position() - current_table_offset;
   7051       assembler->AppendInt32(offset_to_block);
   7052     }
   7053   }
   7054 
   7055  private:
   7056   const HPackedSwitch* switch_instr_;
   7057 };
   7058 
   7059 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   7060   // Generate the constant area if needed.
   7061   X86_64Assembler* assembler = GetAssembler();
   7062   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
   7063     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
   7064     assembler->Align(4, 0);
   7065     constant_area_start_ = assembler->CodeSize();
   7066 
   7067     // Populate any jump tables.
   7068     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
   7069       jump_table->CreateJumpTable();
   7070     }
   7071 
   7072     // And now add the constant area to the generated code.
   7073     assembler->AddConstantArea();
   7074   }
   7075 
   7076   // And finish up.
   7077   CodeGenerator::Finalize(allocator);
   7078 }
   7079 
   7080 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
   7081   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
   7082   return Address::RIP(fixup);
   7083 }
   7084 
   7085 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
   7086   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
   7087   return Address::RIP(fixup);
   7088 }
   7089 
   7090 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
   7091   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
   7092   return Address::RIP(fixup);
   7093 }
   7094 
   7095 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
   7096   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
   7097   return Address::RIP(fixup);
   7098 }
   7099 
   7100 // TODO: trg as memory.
   7101 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
   7102   if (!trg.IsValid()) {
   7103     DCHECK_EQ(type, DataType::Type::kVoid);
   7104     return;
   7105   }
   7106 
   7107   DCHECK_NE(type, DataType::Type::kVoid);
   7108 
   7109   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
   7110   if (trg.Equals(return_loc)) {
   7111     return;
   7112   }
   7113 
   7114   // Let the parallel move resolver take care of all of this.
   7115   HParallelMove parallel_move(GetGraph()->GetAllocator());
   7116   parallel_move.AddMove(return_loc, trg, type, nullptr);
   7117   GetMoveResolver()->EmitNativeCode(&parallel_move);
   7118 }
   7119 
   7120 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
   7121   // Create a fixup to be used to create and address the jump table.
   7122   JumpTableRIPFixup* table_fixup =
   7123       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
   7124 
   7125   // We have to populate the jump tables.
   7126   fixups_to_jump_tables_.push_back(table_fixup);
   7127   return Address::RIP(table_fixup);
   7128 }
   7129 
   7130 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
   7131                                              const Address& addr_high,
   7132                                              int64_t v,
   7133                                              HInstruction* instruction) {
   7134   if (IsInt<32>(v)) {
   7135     int32_t v_32 = v;
   7136     __ movq(addr_low, Immediate(v_32));
   7137     MaybeRecordImplicitNullCheck(instruction);
   7138   } else {
   7139     // Didn't fit in a register.  Do it in pieces.
   7140     int32_t low_v = Low32Bits(v);
   7141     int32_t high_v = High32Bits(v);
   7142     __ movl(addr_low, Immediate(low_v));
   7143     MaybeRecordImplicitNullCheck(instruction);
   7144     __ movl(addr_high, Immediate(high_v));
   7145   }
   7146 }
   7147 
   7148 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
   7149                                           const uint8_t* roots_data,
   7150                                           const PatchInfo<Label>& info,
   7151                                           uint64_t index_in_table) const {
   7152   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
   7153   uintptr_t address =
   7154       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
   7155   typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
   7156   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
   7157      dchecked_integral_cast<uint32_t>(address);
   7158 }
   7159 
   7160 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   7161   for (const PatchInfo<Label>& info : jit_string_patches_) {
   7162     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
   7163     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
   7164     PatchJitRootUse(code, roots_data, info, index_in_table);
   7165   }
   7166 
   7167   for (const PatchInfo<Label>& info : jit_class_patches_) {
   7168     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
   7169     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
   7170     PatchJitRootUse(code, roots_data, info, index_in_table);
   7171   }
   7172 }
   7173 
   7174 #undef __
   7175 
   7176 }  // namespace x86_64
   7177 }  // namespace art
   7178