Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "code_generator_x86_64.h"
     18 
     19 #include "art_method.h"
     20 #include "code_generator_utils.h"
     21 #include "compiled_method.h"
     22 #include "entrypoints/quick/quick_entrypoints.h"
     23 #include "gc/accounting/card_table.h"
     24 #include "intrinsics.h"
     25 #include "intrinsics_x86_64.h"
     26 #include "mirror/array-inl.h"
     27 #include "mirror/class-inl.h"
     28 #include "mirror/object_reference.h"
     29 #include "thread.h"
     30 #include "utils/assembler.h"
     31 #include "utils/stack_checks.h"
     32 #include "utils/x86_64/assembler_x86_64.h"
     33 #include "utils/x86_64/managed_register_x86_64.h"
     34 
     35 namespace art {
     36 
     37 template<class MirrorType>
     38 class GcRoot;
     39 
     40 namespace x86_64 {
     41 
     42 static constexpr int kCurrentMethodStackOffset = 0;
     43 static constexpr Register kMethodRegisterArgument = RDI;
     44 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
     45 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
     46 // generates less code/data with a small num_entries.
     47 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
     48 
     49 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
     50 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
     51 
     52 static constexpr int kC2ConditionMask = 0x400;
     53 
     54 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
     55 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
     56 
     57 class NullCheckSlowPathX86_64 : public SlowPathCode {
     58  public:
     59   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
     60 
     61   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     62     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     63     __ Bind(GetEntryLabel());
     64     if (instruction_->CanThrowIntoCatchBlock()) {
     65       // Live registers will be restored in the catch block if caught.
     66       SaveLiveRegisters(codegen, instruction_->GetLocations());
     67     }
     68     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
     69                                   instruction_,
     70                                   instruction_->GetDexPc(),
     71                                   this);
     72     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
     73   }
     74 
     75   bool IsFatal() const OVERRIDE { return true; }
     76 
     77   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
     78 
     79  private:
     80   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
     81 };
     82 
     83 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
     84  public:
     85   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
     86 
     87   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     88     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     89     __ Bind(GetEntryLabel());
     90     if (instruction_->CanThrowIntoCatchBlock()) {
     91       // Live registers will be restored in the catch block if caught.
     92       SaveLiveRegisters(codegen, instruction_->GetLocations());
     93     }
     94     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
     95                                   instruction_,
     96                                   instruction_->GetDexPc(),
     97                                   this);
     98     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
     99   }
    100 
    101   bool IsFatal() const OVERRIDE { return true; }
    102 
    103   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
    104 
    105  private:
    106   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
    107 };
    108 
    109 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
    110  public:
    111   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
    112       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
    113 
    114   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    115     __ Bind(GetEntryLabel());
    116     if (type_ == Primitive::kPrimInt) {
    117       if (is_div_) {
    118         __ negl(cpu_reg_);
    119       } else {
    120         __ xorl(cpu_reg_, cpu_reg_);
    121       }
    122 
    123     } else {
    124       DCHECK_EQ(Primitive::kPrimLong, type_);
    125       if (is_div_) {
    126         __ negq(cpu_reg_);
    127       } else {
    128         __ xorl(cpu_reg_, cpu_reg_);
    129       }
    130     }
    131     __ jmp(GetExitLabel());
    132   }
    133 
    134   const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
    135 
    136  private:
    137   const CpuRegister cpu_reg_;
    138   const Primitive::Type type_;
    139   const bool is_div_;
    140   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
    141 };
    142 
    143 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
    144  public:
    145   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
    146       : SlowPathCode(instruction), successor_(successor) {}
    147 
    148   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    149     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    150     __ Bind(GetEntryLabel());
    151     SaveLiveRegisters(codegen, instruction_->GetLocations());
    152     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
    153                                   instruction_,
    154                                   instruction_->GetDexPc(),
    155                                   this);
    156     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
    157     RestoreLiveRegisters(codegen, instruction_->GetLocations());
    158     if (successor_ == nullptr) {
    159       __ jmp(GetReturnLabel());
    160     } else {
    161       __ jmp(x86_64_codegen->GetLabelOf(successor_));
    162     }
    163   }
    164 
    165   Label* GetReturnLabel() {
    166     DCHECK(successor_ == nullptr);
    167     return &return_label_;
    168   }
    169 
    170   HBasicBlock* GetSuccessor() const {
    171     return successor_;
    172   }
    173 
    174   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
    175 
    176  private:
    177   HBasicBlock* const successor_;
    178   Label return_label_;
    179 
    180   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
    181 };
    182 
    183 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
    184  public:
    185   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
    186     : SlowPathCode(instruction) {}
    187 
    188   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    189     LocationSummary* locations = instruction_->GetLocations();
    190     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    191     __ Bind(GetEntryLabel());
    192     if (instruction_->CanThrowIntoCatchBlock()) {
    193       // Live registers will be restored in the catch block if caught.
    194       SaveLiveRegisters(codegen, instruction_->GetLocations());
    195     }
    196     // We're moving two locations to locations that could overlap, so we need a parallel
    197     // move resolver.
    198     InvokeRuntimeCallingConvention calling_convention;
    199     codegen->EmitParallelMoves(
    200         locations->InAt(0),
    201         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    202         Primitive::kPrimInt,
    203         locations->InAt(1),
    204         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    205         Primitive::kPrimInt);
    206     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
    207                                   instruction_,
    208                                   instruction_->GetDexPc(),
    209                                   this);
    210     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
    211   }
    212 
    213   bool IsFatal() const OVERRIDE { return true; }
    214 
    215   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
    216 
    217  private:
    218   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
    219 };
    220 
    221 class LoadClassSlowPathX86_64 : public SlowPathCode {
    222  public:
    223   LoadClassSlowPathX86_64(HLoadClass* cls,
    224                           HInstruction* at,
    225                           uint32_t dex_pc,
    226                           bool do_clinit)
    227       : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
    228     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
    229   }
    230 
    231   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    232     LocationSummary* locations = at_->GetLocations();
    233     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    234     __ Bind(GetEntryLabel());
    235 
    236     SaveLiveRegisters(codegen, locations);
    237 
    238     InvokeRuntimeCallingConvention calling_convention;
    239     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
    240     x86_64_codegen->InvokeRuntime(do_clinit_ ?
    241                                       QUICK_ENTRY_POINT(pInitializeStaticStorage) :
    242                                       QUICK_ENTRY_POINT(pInitializeType),
    243                                   at_,
    244                                   dex_pc_,
    245                                   this);
    246     if (do_clinit_) {
    247       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
    248     } else {
    249       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
    250     }
    251 
    252     Location out = locations->Out();
    253     // Move the class to the desired location.
    254     if (out.IsValid()) {
    255       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
    256       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
    257     }
    258 
    259     RestoreLiveRegisters(codegen, locations);
    260     __ jmp(GetExitLabel());
    261   }
    262 
    263   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
    264 
    265  private:
    266   // The class this slow path will load.
    267   HLoadClass* const cls_;
    268 
    269   // The instruction where this slow path is happening.
    270   // (Might be the load class or an initialization check).
    271   HInstruction* const at_;
    272 
    273   // The dex PC of `at_`.
    274   const uint32_t dex_pc_;
    275 
    276   // Whether to initialize the class.
    277   const bool do_clinit_;
    278 
    279   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
    280 };
    281 
    282 class LoadStringSlowPathX86_64 : public SlowPathCode {
    283  public:
    284   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
    285 
    286   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    287     LocationSummary* locations = instruction_->GetLocations();
    288     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    289 
    290     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    291     __ Bind(GetEntryLabel());
    292     SaveLiveRegisters(codegen, locations);
    293 
    294     InvokeRuntimeCallingConvention calling_convention;
    295     const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
    296     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
    297     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
    298                                   instruction_,
    299                                   instruction_->GetDexPc(),
    300                                   this);
    301     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
    302     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
    303     RestoreLiveRegisters(codegen, locations);
    304     __ jmp(GetExitLabel());
    305   }
    306 
    307   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
    308 
    309  private:
    310   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
    311 };
    312 
    313 class TypeCheckSlowPathX86_64 : public SlowPathCode {
    314  public:
    315   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
    316       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
    317 
    318   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    319     LocationSummary* locations = instruction_->GetLocations();
    320     Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
    321                                                         : locations->Out();
    322     uint32_t dex_pc = instruction_->GetDexPc();
    323     DCHECK(instruction_->IsCheckCast()
    324            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
    325 
    326     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    327     __ Bind(GetEntryLabel());
    328 
    329     if (!is_fatal_) {
    330       SaveLiveRegisters(codegen, locations);
    331     }
    332 
    333     // We're moving two locations to locations that could overlap, so we need a parallel
    334     // move resolver.
    335     InvokeRuntimeCallingConvention calling_convention;
    336     codegen->EmitParallelMoves(
    337         locations->InAt(1),
    338         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    339         Primitive::kPrimNot,
    340         object_class,
    341         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    342         Primitive::kPrimNot);
    343 
    344     if (instruction_->IsInstanceOf()) {
    345       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
    346                                     instruction_,
    347                                     dex_pc,
    348                                     this);
    349       CheckEntrypointTypes<
    350           kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
    351     } else {
    352       DCHECK(instruction_->IsCheckCast());
    353       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
    354                                     instruction_,
    355                                     dex_pc,
    356                                     this);
    357       CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
    358     }
    359 
    360     if (!is_fatal_) {
    361       if (instruction_->IsInstanceOf()) {
    362         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
    363       }
    364 
    365       RestoreLiveRegisters(codegen, locations);
    366       __ jmp(GetExitLabel());
    367     }
    368   }
    369 
    370   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
    371 
    372   bool IsFatal() const OVERRIDE { return is_fatal_; }
    373 
    374  private:
    375   const bool is_fatal_;
    376 
    377   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
    378 };
    379 
    380 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
    381  public:
    382   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
    383       : SlowPathCode(instruction) {}
    384 
    385   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    386     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    387     __ Bind(GetEntryLabel());
    388     SaveLiveRegisters(codegen, instruction_->GetLocations());
    389     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
    390                                   instruction_,
    391                                   instruction_->GetDexPc(),
    392                                   this);
    393     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
    394   }
    395 
    396   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
    397 
    398  private:
    399   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
    400 };
    401 
    402 class ArraySetSlowPathX86_64 : public SlowPathCode {
    403  public:
    404   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
    405 
    406   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    407     LocationSummary* locations = instruction_->GetLocations();
    408     __ Bind(GetEntryLabel());
    409     SaveLiveRegisters(codegen, locations);
    410 
    411     InvokeRuntimeCallingConvention calling_convention;
    412     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
    413     parallel_move.AddMove(
    414         locations->InAt(0),
    415         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    416         Primitive::kPrimNot,
    417         nullptr);
    418     parallel_move.AddMove(
    419         locations->InAt(1),
    420         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    421         Primitive::kPrimInt,
    422         nullptr);
    423     parallel_move.AddMove(
    424         locations->InAt(2),
    425         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
    426         Primitive::kPrimNot,
    427         nullptr);
    428     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    429 
    430     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    431     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
    432                                   instruction_,
    433                                   instruction_->GetDexPc(),
    434                                   this);
    435     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
    436     RestoreLiveRegisters(codegen, locations);
    437     __ jmp(GetExitLabel());
    438   }
    439 
    440   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
    441 
    442  private:
    443   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
    444 };
    445 
    446 // Slow path marking an object during a read barrier.
    447 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
    448  public:
    449   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
    450       : SlowPathCode(instruction), out_(out), obj_(obj) {
    451     DCHECK(kEmitCompilerReadBarrier);
    452   }
    453 
    454   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
    455 
    456   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    457     LocationSummary* locations = instruction_->GetLocations();
    458     Register reg_out = out_.AsRegister<Register>();
    459     DCHECK(locations->CanCall());
    460     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
    461     DCHECK(instruction_->IsInstanceFieldGet() ||
    462            instruction_->IsStaticFieldGet() ||
    463            instruction_->IsArrayGet() ||
    464            instruction_->IsLoadClass() ||
    465            instruction_->IsLoadString() ||
    466            instruction_->IsInstanceOf() ||
    467            instruction_->IsCheckCast())
    468         << "Unexpected instruction in read barrier marking slow path: "
    469         << instruction_->DebugName();
    470 
    471     __ Bind(GetEntryLabel());
    472     SaveLiveRegisters(codegen, locations);
    473 
    474     InvokeRuntimeCallingConvention calling_convention;
    475     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    476     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
    477     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
    478                                instruction_,
    479                                instruction_->GetDexPc(),
    480                                this);
    481     CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
    482     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
    483 
    484     RestoreLiveRegisters(codegen, locations);
    485     __ jmp(GetExitLabel());
    486   }
    487 
    488  private:
    489   const Location out_;
    490   const Location obj_;
    491 
    492   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
    493 };
    494 
    495 // Slow path generating a read barrier for a heap reference.
    496 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
    497  public:
    498   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
    499                                             Location out,
    500                                             Location ref,
    501                                             Location obj,
    502                                             uint32_t offset,
    503                                             Location index)
    504       : SlowPathCode(instruction),
    505         out_(out),
    506         ref_(ref),
    507         obj_(obj),
    508         offset_(offset),
    509         index_(index) {
    510     DCHECK(kEmitCompilerReadBarrier);
    511     // If `obj` is equal to `out` or `ref`, it means the initial
    512     // object has been overwritten by (or after) the heap object
    513     // reference load to be instrumented, e.g.:
    514     //
    515     //   __ movl(out, Address(out, offset));
    516     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
    517     //
    518     // In that case, we have lost the information about the original
    519     // object, and the emitted read barrier cannot work properly.
    520     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
    521     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
    522 }
    523 
    524   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    525     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    526     LocationSummary* locations = instruction_->GetLocations();
    527     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
    528     DCHECK(locations->CanCall());
    529     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
    530     DCHECK(!instruction_->IsInvoke() ||
    531            (instruction_->IsInvokeStaticOrDirect() &&
    532             instruction_->GetLocations()->Intrinsified()))
    533         << "Unexpected instruction in read barrier for heap reference slow path: "
    534         << instruction_->DebugName();
    535 
    536     __ Bind(GetEntryLabel());
    537     SaveLiveRegisters(codegen, locations);
    538 
    539     // We may have to change the index's value, but as `index_` is a
    540     // constant member (like other "inputs" of this slow path),
    541     // introduce a copy of it, `index`.
    542     Location index = index_;
    543     if (index_.IsValid()) {
    544       // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
    545       if (instruction_->IsArrayGet()) {
    546         // Compute real offset and store it in index_.
    547         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
    548         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
    549         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
    550           // We are about to change the value of `index_reg` (see the
    551           // calls to art::x86_64::X86_64Assembler::shll and
    552           // art::x86_64::X86_64Assembler::AddImmediate below), but it
    553           // has not been saved by the previous call to
    554           // art::SlowPathCode::SaveLiveRegisters, as it is a
    555           // callee-save register --
    556           // art::SlowPathCode::SaveLiveRegisters does not consider
    557           // callee-save registers, as it has been designed with the
    558           // assumption that callee-save registers are supposed to be
    559           // handled by the called function.  So, as a callee-save
    560           // register, `index_reg` _would_ eventually be saved onto
    561           // the stack, but it would be too late: we would have
    562           // changed its value earlier.  Therefore, we manually save
    563           // it here into another freely available register,
    564           // `free_reg`, chosen of course among the caller-save
    565           // registers (as a callee-save `free_reg` register would
    566           // exhibit the same problem).
    567           //
    568           // Note we could have requested a temporary register from
    569           // the register allocator instead; but we prefer not to, as
    570           // this is a slow path, and we know we can find a
    571           // caller-save register that is available.
    572           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
    573           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
    574           index_reg = free_reg;
    575           index = Location::RegisterLocation(index_reg);
    576         } else {
    577           // The initial register stored in `index_` has already been
    578           // saved in the call to art::SlowPathCode::SaveLiveRegisters
    579           // (as it is not a callee-save register), so we can freely
    580           // use it.
    581         }
    582         // Shifting the index value contained in `index_reg` by the
    583         // scale factor (2) cannot overflow in practice, as the
    584         // runtime is unable to allocate object arrays with a size
    585         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
    586         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
    587         static_assert(
    588             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
    589             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
    590         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
    591       } else {
    592         DCHECK(instruction_->IsInvoke());
    593         DCHECK(instruction_->GetLocations()->Intrinsified());
    594         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
    595                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
    596             << instruction_->AsInvoke()->GetIntrinsic();
    597         DCHECK_EQ(offset_, 0U);
    598         DCHECK(index_.IsRegister());
    599       }
    600     }
    601 
    602     // We're moving two or three locations to locations that could
    603     // overlap, so we need a parallel move resolver.
    604     InvokeRuntimeCallingConvention calling_convention;
    605     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
    606     parallel_move.AddMove(ref_,
    607                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
    608                           Primitive::kPrimNot,
    609                           nullptr);
    610     parallel_move.AddMove(obj_,
    611                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
    612                           Primitive::kPrimNot,
    613                           nullptr);
    614     if (index.IsValid()) {
    615       parallel_move.AddMove(index,
    616                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
    617                             Primitive::kPrimInt,
    618                             nullptr);
    619       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    620     } else {
    621       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
    622       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
    623     }
    624     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
    625                                   instruction_,
    626                                   instruction_->GetDexPc(),
    627                                   this);
    628     CheckEntrypointTypes<
    629         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
    630     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
    631 
    632     RestoreLiveRegisters(codegen, locations);
    633     __ jmp(GetExitLabel());
    634   }
    635 
    636   const char* GetDescription() const OVERRIDE {
    637     return "ReadBarrierForHeapReferenceSlowPathX86_64";
    638   }
    639 
    640  private:
    641   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
    642     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
    643     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
    644     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
    645       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
    646         return static_cast<CpuRegister>(i);
    647       }
    648     }
    649     // We shall never fail to find a free caller-save register, as
    650     // there are more than two core caller-save registers on x86-64
    651     // (meaning it is possible to find one which is different from
    652     // `ref` and `obj`).
    653     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
    654     LOG(FATAL) << "Could not find a free caller-save register";
    655     UNREACHABLE();
    656   }
    657 
    658   const Location out_;
    659   const Location ref_;
    660   const Location obj_;
    661   const uint32_t offset_;
    662   // An additional location containing an index to an array.
    663   // Only used for HArrayGet and the UnsafeGetObject &
    664   // UnsafeGetObjectVolatile intrinsics.
    665   const Location index_;
    666 
    667   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
    668 };
    669 
    670 // Slow path generating a read barrier for a GC root.
    671 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
    672  public:
    673   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
    674       : SlowPathCode(instruction), out_(out), root_(root) {
    675     DCHECK(kEmitCompilerReadBarrier);
    676   }
    677 
    678   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
    679     LocationSummary* locations = instruction_->GetLocations();
    680     DCHECK(locations->CanCall());
    681     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
    682     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
    683         << "Unexpected instruction in read barrier for GC root slow path: "
    684         << instruction_->DebugName();
    685 
    686     __ Bind(GetEntryLabel());
    687     SaveLiveRegisters(codegen, locations);
    688 
    689     InvokeRuntimeCallingConvention calling_convention;
    690     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
    691     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
    692     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
    693                                   instruction_,
    694                                   instruction_->GetDexPc(),
    695                                   this);
    696     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
    697     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
    698 
    699     RestoreLiveRegisters(codegen, locations);
    700     __ jmp(GetExitLabel());
    701   }
    702 
    703   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
    704 
    705  private:
    706   const Location out_;
    707   const Location root_;
    708 
    709   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
    710 };
    711 
    712 #undef __
    713 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
    714 
    715 inline Condition X86_64IntegerCondition(IfCondition cond) {
    716   switch (cond) {
    717     case kCondEQ: return kEqual;
    718     case kCondNE: return kNotEqual;
    719     case kCondLT: return kLess;
    720     case kCondLE: return kLessEqual;
    721     case kCondGT: return kGreater;
    722     case kCondGE: return kGreaterEqual;
    723     case kCondB:  return kBelow;
    724     case kCondBE: return kBelowEqual;
    725     case kCondA:  return kAbove;
    726     case kCondAE: return kAboveEqual;
    727   }
    728   LOG(FATAL) << "Unreachable";
    729   UNREACHABLE();
    730 }
    731 
    732 // Maps FP condition to x86_64 name.
    733 inline Condition X86_64FPCondition(IfCondition cond) {
    734   switch (cond) {
    735     case kCondEQ: return kEqual;
    736     case kCondNE: return kNotEqual;
    737     case kCondLT: return kBelow;
    738     case kCondLE: return kBelowEqual;
    739     case kCondGT: return kAbove;
    740     case kCondGE: return kAboveEqual;
    741     default:      break;  // should not happen
    742   };
    743   LOG(FATAL) << "Unreachable";
    744   UNREACHABLE();
    745 }
    746 
    747 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
    748       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    749       MethodReference target_method ATTRIBUTE_UNUSED) {
    750   switch (desired_dispatch_info.code_ptr_location) {
    751     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
    752     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
    753       // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
    754       return HInvokeStaticOrDirect::DispatchInfo {
    755         desired_dispatch_info.method_load_kind,
    756         HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
    757         desired_dispatch_info.method_load_data,
    758         0u
    759       };
    760     default:
    761       return desired_dispatch_info;
    762   }
    763 }
    764 
    765 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
    766                                                      Location temp) {
    767   // All registers are assumed to be correctly set up.
    768 
    769   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
    770   switch (invoke->GetMethodLoadKind()) {
    771     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
    772       // temp = thread->string_init_entrypoint
    773       __ gs()->movq(temp.AsRegister<CpuRegister>(),
    774                     Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
    775       break;
    776     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
    777       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
    778       break;
    779     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
    780       __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
    781       break;
    782     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
    783       __ movl(temp.AsRegister<CpuRegister>(), Immediate(0));  // Placeholder.
    784       method_patches_.emplace_back(invoke->GetTargetMethod());
    785       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
    786       break;
    787     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
    788       __ movq(temp.AsRegister<CpuRegister>(),
    789               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
    790       // Bind a new fixup label at the end of the "movl" insn.
    791       uint32_t offset = invoke->GetDexCacheArrayOffset();
    792       __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
    793       break;
    794     }
    795     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
    796       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
    797       Register method_reg;
    798       CpuRegister reg = temp.AsRegister<CpuRegister>();
    799       if (current_method.IsRegister()) {
    800         method_reg = current_method.AsRegister<Register>();
    801       } else {
    802         DCHECK(invoke->GetLocations()->Intrinsified());
    803         DCHECK(!current_method.IsValid());
    804         method_reg = reg.AsRegister();
    805         __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
    806       }
    807       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
    808       __ movq(reg,
    809               Address(CpuRegister(method_reg),
    810                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
    811       // temp = temp[index_in_cache];
    812       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
    813       uint32_t index_in_cache = invoke->GetDexMethodIndex();
    814       __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
    815       break;
    816     }
    817   }
    818 
    819   switch (invoke->GetCodePtrLocation()) {
    820     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
    821       __ call(&frame_entry_label_);
    822       break;
    823     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
    824       relative_call_patches_.emplace_back(invoke->GetTargetMethod());
    825       Label* label = &relative_call_patches_.back().label;
    826       __ call(label);  // Bind to the patch label, override at link time.
    827       __ Bind(label);  // Bind the label at the end of the "call" insn.
    828       break;
    829     }
    830     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
    831     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
    832       // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
    833       LOG(FATAL) << "Unsupported";
    834       UNREACHABLE();
    835     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
    836       // (callee_method + offset_of_quick_compiled_code)()
    837       __ call(Address(callee_method.AsRegister<CpuRegister>(),
    838                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
    839                           kX86_64WordSize).SizeValue()));
    840       break;
    841   }
    842 
    843   DCHECK(!IsLeafMethod());
    844 }
    845 
    846 void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
    847   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
    848   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
    849       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
    850 
    851   // Use the calling convention instead of the location of the receiver, as
    852   // intrinsics may have put the receiver in a different register. In the intrinsics
    853   // slow path, the arguments have been moved to the right place, so here we are
    854   // guaranteed that the receiver is the first register of the calling convention.
    855   InvokeDexCallingConvention calling_convention;
    856   Register receiver = calling_convention.GetRegisterAt(0);
    857 
    858   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
    859   // /* HeapReference<Class> */ temp = receiver->klass_
    860   __ movl(temp, Address(CpuRegister(receiver), class_offset));
    861   MaybeRecordImplicitNullCheck(invoke);
    862   // Instead of simply (possibly) unpoisoning `temp` here, we should
    863   // emit a read barrier for the previous class reference load.
    864   // However this is not required in practice, as this is an
    865   // intermediate/temporary reference and because the current
    866   // concurrent copying collector keeps the from-space memory
    867   // intact/accessible until the end of the marking phase (the
    868   // concurrent copying collector may not in the future).
    869   __ MaybeUnpoisonHeapReference(temp);
    870   // temp = temp->GetMethodAt(method_offset);
    871   __ movq(temp, Address(temp, method_offset));
    872   // call temp->GetEntryPoint();
    873   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
    874       kX86_64WordSize).SizeValue()));
    875 }
    876 
    877 void CodeGeneratorX86_64::RecordSimplePatch() {
    878   if (GetCompilerOptions().GetIncludePatchInformation()) {
    879     simple_patches_.emplace_back();
    880     __ Bind(&simple_patches_.back());
    881   }
    882 }
    883 
    884 void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
    885   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
    886   __ Bind(&string_patches_.back().label);
    887 }
    888 
    889 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
    890                                                             uint32_t element_offset) {
    891   // Add a patch entry and return the label.
    892   pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
    893   return &pc_relative_dex_cache_patches_.back().label;
    894 }
    895 
    896 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
    897   DCHECK(linker_patches->empty());
    898   size_t size =
    899       method_patches_.size() +
    900       relative_call_patches_.size() +
    901       pc_relative_dex_cache_patches_.size() +
    902       simple_patches_.size() +
    903       string_patches_.size();
    904   linker_patches->reserve(size);
    905   // The label points to the end of the "movl" insn but the literal offset for method
    906   // patch needs to point to the embedded constant which occupies the last 4 bytes.
    907   constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
    908   for (const MethodPatchInfo<Label>& info : method_patches_) {
    909     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
    910     linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
    911                                                        info.target_method.dex_file,
    912                                                        info.target_method.dex_method_index));
    913   }
    914   for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
    915     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
    916     linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
    917                                                              info.target_method.dex_file,
    918                                                              info.target_method.dex_method_index));
    919   }
    920   for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
    921     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
    922     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
    923                                                               &info.target_dex_file,
    924                                                               info.label.Position(),
    925                                                               info.element_offset));
    926   }
    927   for (const Label& label : simple_patches_) {
    928     uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
    929     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
    930   }
    931   for (const StringPatchInfo<Label>& info : string_patches_) {
    932     // These are always PC-relative, see GetSupportedLoadStringKind().
    933     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
    934     linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
    935                                                                &info.dex_file,
    936                                                                info.label.Position(),
    937                                                                info.string_index));
    938   }
    939 }
    940 
    941 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
    942   stream << Register(reg);
    943 }
    944 
    945 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
    946   stream << FloatRegister(reg);
    947 }
    948 
    949 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
    950   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
    951   return kX86_64WordSize;
    952 }
    953 
    954 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
    955   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
    956   return kX86_64WordSize;
    957 }
    958 
    959 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
    960   __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
    961   return kX86_64WordSize;
    962 }
    963 
    964 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
    965   __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
    966   return kX86_64WordSize;
    967 }
    968 
    969 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
    970                                         HInstruction* instruction,
    971                                         uint32_t dex_pc,
    972                                         SlowPathCode* slow_path) {
    973   InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
    974                 instruction,
    975                 dex_pc,
    976                 slow_path);
    977 }
    978 
    979 void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
    980                                         HInstruction* instruction,
    981                                         uint32_t dex_pc,
    982                                         SlowPathCode* slow_path) {
    983   ValidateInvokeRuntime(instruction, slow_path);
    984   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
    985   RecordPcInfo(instruction, dex_pc, slow_path);
    986 }
    987 
    988 static constexpr int kNumberOfCpuRegisterPairs = 0;
    989 // Use a fake return address register to mimic Quick.
    990 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
    991 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
    992                                          const X86_64InstructionSetFeatures& isa_features,
    993                                          const CompilerOptions& compiler_options,
    994                                          OptimizingCompilerStats* stats)
    995       : CodeGenerator(graph,
    996                       kNumberOfCpuRegisters,
    997                       kNumberOfFloatRegisters,
    998                       kNumberOfCpuRegisterPairs,
    999                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
   1000                                           arraysize(kCoreCalleeSaves))
   1001                           | (1 << kFakeReturnRegister),
   1002                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
   1003                                           arraysize(kFpuCalleeSaves)),
   1004                       compiler_options,
   1005                       stats),
   1006         block_labels_(nullptr),
   1007         location_builder_(graph, this),
   1008         instruction_visitor_(graph, this),
   1009         move_resolver_(graph->GetArena(), this),
   1010         assembler_(graph->GetArena()),
   1011         isa_features_(isa_features),
   1012         constant_area_start_(0),
   1013         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1014         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1015         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1016         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1017         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
   1018         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   1019   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
   1020 }
   1021 
   1022 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
   1023                                                                CodeGeneratorX86_64* codegen)
   1024       : InstructionCodeGenerator(graph, codegen),
   1025         assembler_(codegen->GetAssembler()),
   1026         codegen_(codegen) {}
   1027 
   1028 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
   1029   // Stack register is always reserved.
   1030   blocked_core_registers_[RSP] = true;
   1031 
   1032   // Block the register used as TMP.
   1033   blocked_core_registers_[TMP] = true;
   1034 }
   1035 
   1036 static dwarf::Reg DWARFReg(Register reg) {
   1037   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
   1038 }
   1039 
   1040 static dwarf::Reg DWARFReg(FloatRegister reg) {
   1041   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
   1042 }
   1043 
   1044 void CodeGeneratorX86_64::GenerateFrameEntry() {
   1045   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
   1046   __ Bind(&frame_entry_label_);
   1047   bool skip_overflow_check = IsLeafMethod()
   1048       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
   1049   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   1050 
   1051   if (!skip_overflow_check) {
   1052     __ testq(CpuRegister(RAX), Address(
   1053         CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
   1054     RecordPcInfo(nullptr, 0);
   1055   }
   1056 
   1057   if (HasEmptyFrame()) {
   1058     return;
   1059   }
   1060 
   1061   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
   1062     Register reg = kCoreCalleeSaves[i];
   1063     if (allocated_registers_.ContainsCoreRegister(reg)) {
   1064       __ pushq(CpuRegister(reg));
   1065       __ cfi().AdjustCFAOffset(kX86_64WordSize);
   1066       __ cfi().RelOffset(DWARFReg(reg), 0);
   1067     }
   1068   }
   1069 
   1070   int adjust = GetFrameSize() - GetCoreSpillSize();
   1071   __ subq(CpuRegister(RSP), Immediate(adjust));
   1072   __ cfi().AdjustCFAOffset(adjust);
   1073   uint32_t xmm_spill_location = GetFpuSpillStart();
   1074   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   1075 
   1076   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
   1077     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
   1078       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
   1079       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
   1080       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
   1081     }
   1082   }
   1083 
   1084   __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
   1085           CpuRegister(kMethodRegisterArgument));
   1086 }
   1087 
   1088 void CodeGeneratorX86_64::GenerateFrameExit() {
   1089   __ cfi().RememberState();
   1090   if (!HasEmptyFrame()) {
   1091     uint32_t xmm_spill_location = GetFpuSpillStart();
   1092     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   1093     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
   1094       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
   1095         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
   1096         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
   1097         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
   1098       }
   1099     }
   1100 
   1101     int adjust = GetFrameSize() - GetCoreSpillSize();
   1102     __ addq(CpuRegister(RSP), Immediate(adjust));
   1103     __ cfi().AdjustCFAOffset(-adjust);
   1104 
   1105     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
   1106       Register reg = kCoreCalleeSaves[i];
   1107       if (allocated_registers_.ContainsCoreRegister(reg)) {
   1108         __ popq(CpuRegister(reg));
   1109         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
   1110         __ cfi().Restore(DWARFReg(reg));
   1111       }
   1112     }
   1113   }
   1114   __ ret();
   1115   __ cfi().RestoreState();
   1116   __ cfi().DefCFAOffset(GetFrameSize());
   1117 }
   1118 
   1119 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
   1120   __ Bind(GetLabelOf(block));
   1121 }
   1122 
   1123 void CodeGeneratorX86_64::Move(Location destination, Location source) {
   1124   if (source.Equals(destination)) {
   1125     return;
   1126   }
   1127   if (destination.IsRegister()) {
   1128     CpuRegister dest = destination.AsRegister<CpuRegister>();
   1129     if (source.IsRegister()) {
   1130       __ movq(dest, source.AsRegister<CpuRegister>());
   1131     } else if (source.IsFpuRegister()) {
   1132       __ movd(dest, source.AsFpuRegister<XmmRegister>());
   1133     } else if (source.IsStackSlot()) {
   1134       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1135     } else if (source.IsConstant()) {
   1136       HConstant* constant = source.GetConstant();
   1137       if (constant->IsLongConstant()) {
   1138         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
   1139       } else {
   1140         Load32BitValue(dest, GetInt32ValueOf(constant));
   1141       }
   1142     } else {
   1143       DCHECK(source.IsDoubleStackSlot());
   1144       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1145     }
   1146   } else if (destination.IsFpuRegister()) {
   1147     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   1148     if (source.IsRegister()) {
   1149       __ movd(dest, source.AsRegister<CpuRegister>());
   1150     } else if (source.IsFpuRegister()) {
   1151       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
   1152     } else if (source.IsConstant()) {
   1153       HConstant* constant = source.GetConstant();
   1154       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
   1155       if (constant->IsFloatConstant()) {
   1156         Load32BitValue(dest, static_cast<int32_t>(value));
   1157       } else {
   1158         Load64BitValue(dest, value);
   1159       }
   1160     } else if (source.IsStackSlot()) {
   1161       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1162     } else {
   1163       DCHECK(source.IsDoubleStackSlot());
   1164       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
   1165     }
   1166   } else if (destination.IsStackSlot()) {
   1167     if (source.IsRegister()) {
   1168       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1169               source.AsRegister<CpuRegister>());
   1170     } else if (source.IsFpuRegister()) {
   1171       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1172                source.AsFpuRegister<XmmRegister>());
   1173     } else if (source.IsConstant()) {
   1174       HConstant* constant = source.GetConstant();
   1175       int32_t value = GetInt32ValueOf(constant);
   1176       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
   1177     } else {
   1178       DCHECK(source.IsStackSlot()) << source;
   1179       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   1180       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   1181     }
   1182   } else {
   1183     DCHECK(destination.IsDoubleStackSlot());
   1184     if (source.IsRegister()) {
   1185       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1186               source.AsRegister<CpuRegister>());
   1187     } else if (source.IsFpuRegister()) {
   1188       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
   1189                source.AsFpuRegister<XmmRegister>());
   1190     } else if (source.IsConstant()) {
   1191       HConstant* constant = source.GetConstant();
   1192       int64_t value;
   1193       if (constant->IsDoubleConstant()) {
   1194         value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
   1195       } else {
   1196         DCHECK(constant->IsLongConstant());
   1197         value = constant->AsLongConstant()->GetValue();
   1198       }
   1199       Store64BitValueToStack(destination, value);
   1200     } else {
   1201       DCHECK(source.IsDoubleStackSlot());
   1202       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   1203       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   1204     }
   1205   }
   1206 }
   1207 
   1208 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
   1209   DCHECK(location.IsRegister());
   1210   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
   1211 }
   1212 
   1213 void CodeGeneratorX86_64::MoveLocation(
   1214     Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
   1215   Move(dst, src);
   1216 }
   1217 
   1218 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
   1219   if (location.IsRegister()) {
   1220     locations->AddTemp(location);
   1221   } else {
   1222     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
   1223   }
   1224 }
   1225 
   1226 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   1227   DCHECK(!successor->IsExitBlock());
   1228 
   1229   HBasicBlock* block = got->GetBlock();
   1230   HInstruction* previous = got->GetPrevious();
   1231 
   1232   HLoopInformation* info = block->GetLoopInformation();
   1233   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
   1234     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
   1235     return;
   1236   }
   1237 
   1238   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
   1239     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   1240   }
   1241   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
   1242     __ jmp(codegen_->GetLabelOf(successor));
   1243   }
   1244 }
   1245 
   1246 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
   1247   got->SetLocations(nullptr);
   1248 }
   1249 
   1250 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
   1251   HandleGoto(got, got->GetSuccessor());
   1252 }
   1253 
   1254 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
   1255   try_boundary->SetLocations(nullptr);
   1256 }
   1257 
   1258 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
   1259   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
   1260   if (!successor->IsExitBlock()) {
   1261     HandleGoto(try_boundary, successor);
   1262   }
   1263 }
   1264 
   1265 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
   1266   exit->SetLocations(nullptr);
   1267 }
   1268 
   1269 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
   1270 }
   1271 
   1272 template<class LabelType>
   1273 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
   1274                                                      LabelType* true_label,
   1275                                                      LabelType* false_label) {
   1276   if (cond->IsFPConditionTrueIfNaN()) {
   1277     __ j(kUnordered, true_label);
   1278   } else if (cond->IsFPConditionFalseIfNaN()) {
   1279     __ j(kUnordered, false_label);
   1280   }
   1281   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
   1282 }
   1283 
   1284 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
   1285   LocationSummary* locations = condition->GetLocations();
   1286 
   1287   Location left = locations->InAt(0);
   1288   Location right = locations->InAt(1);
   1289   Primitive::Type type = condition->InputAt(0)->GetType();
   1290   switch (type) {
   1291     case Primitive::kPrimBoolean:
   1292     case Primitive::kPrimByte:
   1293     case Primitive::kPrimChar:
   1294     case Primitive::kPrimShort:
   1295     case Primitive::kPrimInt:
   1296     case Primitive::kPrimNot: {
   1297       CpuRegister left_reg = left.AsRegister<CpuRegister>();
   1298       if (right.IsConstant()) {
   1299         int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
   1300         if (value == 0) {
   1301           __ testl(left_reg, left_reg);
   1302         } else {
   1303           __ cmpl(left_reg, Immediate(value));
   1304         }
   1305       } else if (right.IsStackSlot()) {
   1306         __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   1307       } else {
   1308         __ cmpl(left_reg, right.AsRegister<CpuRegister>());
   1309       }
   1310       break;
   1311     }
   1312     case Primitive::kPrimLong: {
   1313       CpuRegister left_reg = left.AsRegister<CpuRegister>();
   1314       if (right.IsConstant()) {
   1315         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
   1316         codegen_->Compare64BitValue(left_reg, value);
   1317       } else if (right.IsDoubleStackSlot()) {
   1318         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   1319       } else {
   1320         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
   1321       }
   1322       break;
   1323     }
   1324     case Primitive::kPrimFloat: {
   1325       if (right.IsFpuRegister()) {
   1326         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
   1327       } else if (right.IsConstant()) {
   1328         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
   1329                    codegen_->LiteralFloatAddress(
   1330                      right.GetConstant()->AsFloatConstant()->GetValue()));
   1331       } else {
   1332         DCHECK(right.IsStackSlot());
   1333         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
   1334                    Address(CpuRegister(RSP), right.GetStackIndex()));
   1335       }
   1336       break;
   1337     }
   1338     case Primitive::kPrimDouble: {
   1339       if (right.IsFpuRegister()) {
   1340         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
   1341       } else if (right.IsConstant()) {
   1342         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
   1343                    codegen_->LiteralDoubleAddress(
   1344                      right.GetConstant()->AsDoubleConstant()->GetValue()));
   1345       } else {
   1346         DCHECK(right.IsDoubleStackSlot());
   1347         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
   1348                    Address(CpuRegister(RSP), right.GetStackIndex()));
   1349       }
   1350       break;
   1351     }
   1352     default:
   1353       LOG(FATAL) << "Unexpected condition type " << type;
   1354   }
   1355 }
   1356 
   1357 template<class LabelType>
   1358 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
   1359                                                                   LabelType* true_target_in,
   1360                                                                   LabelType* false_target_in) {
   1361   // Generated branching requires both targets to be explicit. If either of the
   1362   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
   1363   LabelType fallthrough_target;
   1364   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
   1365   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
   1366 
   1367   // Generate the comparison to set the CC.
   1368   GenerateCompareTest(condition);
   1369 
   1370   // Now generate the correct jump(s).
   1371   Primitive::Type type = condition->InputAt(0)->GetType();
   1372   switch (type) {
   1373     case Primitive::kPrimLong: {
   1374       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
   1375       break;
   1376     }
   1377     case Primitive::kPrimFloat: {
   1378       GenerateFPJumps(condition, true_target, false_target);
   1379       break;
   1380     }
   1381     case Primitive::kPrimDouble: {
   1382       GenerateFPJumps(condition, true_target, false_target);
   1383       break;
   1384     }
   1385     default:
   1386       LOG(FATAL) << "Unexpected condition type " << type;
   1387   }
   1388 
   1389   if (false_target != &fallthrough_target) {
   1390     __ jmp(false_target);
   1391   }
   1392 
   1393   if (fallthrough_target.IsLinked()) {
   1394     __ Bind(&fallthrough_target);
   1395   }
   1396 }
   1397 
   1398 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
   1399   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
   1400   // are set only strictly before `branch`. We can't use the eflags on long
   1401   // conditions if they are materialized due to the complex branching.
   1402   return cond->IsCondition() &&
   1403          cond->GetNext() == branch &&
   1404          !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
   1405 }
   1406 
   1407 template<class LabelType>
   1408 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
   1409                                                            size_t condition_input_index,
   1410                                                            LabelType* true_target,
   1411                                                            LabelType* false_target) {
   1412   HInstruction* cond = instruction->InputAt(condition_input_index);
   1413 
   1414   if (true_target == nullptr && false_target == nullptr) {
   1415     // Nothing to do. The code always falls through.
   1416     return;
   1417   } else if (cond->IsIntConstant()) {
   1418     // Constant condition, statically compared against "true" (integer value 1).
   1419     if (cond->AsIntConstant()->IsTrue()) {
   1420       if (true_target != nullptr) {
   1421         __ jmp(true_target);
   1422       }
   1423     } else {
   1424       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
   1425       if (false_target != nullptr) {
   1426         __ jmp(false_target);
   1427       }
   1428     }
   1429     return;
   1430   }
   1431 
   1432   // The following code generates these patterns:
   1433   //  (1) true_target == nullptr && false_target != nullptr
   1434   //        - opposite condition true => branch to false_target
   1435   //  (2) true_target != nullptr && false_target == nullptr
   1436   //        - condition true => branch to true_target
   1437   //  (3) true_target != nullptr && false_target != nullptr
   1438   //        - condition true => branch to true_target
   1439   //        - branch to false_target
   1440   if (IsBooleanValueOrMaterializedCondition(cond)) {
   1441     if (AreEflagsSetFrom(cond, instruction)) {
   1442       if (true_target == nullptr) {
   1443         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
   1444       } else {
   1445         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
   1446       }
   1447     } else {
   1448       // Materialized condition, compare against 0.
   1449       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
   1450       if (lhs.IsRegister()) {
   1451         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
   1452       } else {
   1453         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
   1454       }
   1455       if (true_target == nullptr) {
   1456         __ j(kEqual, false_target);
   1457       } else {
   1458         __ j(kNotEqual, true_target);
   1459       }
   1460     }
   1461   } else {
   1462     // Condition has not been materialized, use its inputs as the
   1463     // comparison and its condition as the branch condition.
   1464     HCondition* condition = cond->AsCondition();
   1465 
   1466     // If this is a long or FP comparison that has been folded into
   1467     // the HCondition, generate the comparison directly.
   1468     Primitive::Type type = condition->InputAt(0)->GetType();
   1469     if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
   1470       GenerateCompareTestAndBranch(condition, true_target, false_target);
   1471       return;
   1472     }
   1473 
   1474     Location lhs = condition->GetLocations()->InAt(0);
   1475     Location rhs = condition->GetLocations()->InAt(1);
   1476     if (rhs.IsRegister()) {
   1477       __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
   1478     } else if (rhs.IsConstant()) {
   1479       int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
   1480       codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
   1481     } else {
   1482       __ cmpl(lhs.AsRegister<CpuRegister>(),
   1483               Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1484     }
   1485       if (true_target == nullptr) {
   1486       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
   1487     } else {
   1488       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
   1489     }
   1490   }
   1491 
   1492   // If neither branch falls through (case 3), the conditional branch to `true_target`
   1493   // was already emitted (case 2) and we need to emit a jump to `false_target`.
   1494   if (true_target != nullptr && false_target != nullptr) {
   1495     __ jmp(false_target);
   1496   }
   1497 }
   1498 
   1499 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
   1500   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
   1501   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
   1502     locations->SetInAt(0, Location::Any());
   1503   }
   1504 }
   1505 
   1506 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
   1507   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   1508   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
   1509   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
   1510       nullptr : codegen_->GetLabelOf(true_successor);
   1511   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
   1512       nullptr : codegen_->GetLabelOf(false_successor);
   1513   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
   1514 }
   1515 
   1516 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   1517   LocationSummary* locations = new (GetGraph()->GetArena())
   1518       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   1519   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
   1520     locations->SetInAt(0, Location::Any());
   1521   }
   1522 }
   1523 
   1524 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   1525   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
   1526   GenerateTestAndBranch<Label>(deoptimize,
   1527                                /* condition_input_index */ 0,
   1528                                slow_path->GetEntryLabel(),
   1529                                /* false_target */ nullptr);
   1530 }
   1531 
   1532 static bool SelectCanUseCMOV(HSelect* select) {
   1533   // There are no conditional move instructions for XMMs.
   1534   if (Primitive::IsFloatingPointType(select->GetType())) {
   1535     return false;
   1536   }
   1537 
   1538   // A FP condition doesn't generate the single CC that we need.
   1539   HInstruction* condition = select->GetCondition();
   1540   if (condition->IsCondition() &&
   1541       Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
   1542     return false;
   1543   }
   1544 
   1545   // We can generate a CMOV for this Select.
   1546   return true;
   1547 }
   1548 
   1549 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
   1550   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   1551   if (Primitive::IsFloatingPointType(select->GetType())) {
   1552     locations->SetInAt(0, Location::RequiresFpuRegister());
   1553     locations->SetInAt(1, Location::Any());
   1554   } else {
   1555     locations->SetInAt(0, Location::RequiresRegister());
   1556     if (SelectCanUseCMOV(select)) {
   1557       if (select->InputAt(1)->IsConstant()) {
   1558         locations->SetInAt(1, Location::RequiresRegister());
   1559       } else {
   1560         locations->SetInAt(1, Location::Any());
   1561       }
   1562     } else {
   1563       locations->SetInAt(1, Location::Any());
   1564     }
   1565   }
   1566   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
   1567     locations->SetInAt(2, Location::RequiresRegister());
   1568   }
   1569   locations->SetOut(Location::SameAsFirstInput());
   1570 }
   1571 
   1572 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
   1573   LocationSummary* locations = select->GetLocations();
   1574   if (SelectCanUseCMOV(select)) {
   1575     // If both the condition and the source types are integer, we can generate
   1576     // a CMOV to implement Select.
   1577     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
   1578     Location value_true_loc = locations->InAt(1);
   1579     DCHECK(locations->InAt(0).Equals(locations->Out()));
   1580 
   1581     HInstruction* select_condition = select->GetCondition();
   1582     Condition cond = kNotEqual;
   1583 
   1584     // Figure out how to test the 'condition'.
   1585     if (select_condition->IsCondition()) {
   1586       HCondition* condition = select_condition->AsCondition();
   1587       if (!condition->IsEmittedAtUseSite()) {
   1588         // This was a previously materialized condition.
   1589         // Can we use the existing condition code?
   1590         if (AreEflagsSetFrom(condition, select)) {
   1591           // Materialization was the previous instruction.  Condition codes are right.
   1592           cond = X86_64IntegerCondition(condition->GetCondition());
   1593         } else {
   1594           // No, we have to recreate the condition code.
   1595           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
   1596           __ testl(cond_reg, cond_reg);
   1597         }
   1598       } else {
   1599         GenerateCompareTest(condition);
   1600         cond = X86_64IntegerCondition(condition->GetCondition());
   1601       }
   1602     } else {
   1603       // Must be a boolean condition, which needs to be compared to 0.
   1604       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
   1605       __ testl(cond_reg, cond_reg);
   1606     }
   1607 
   1608     // If the condition is true, overwrite the output, which already contains false.
   1609     // Generate the correct sized CMOV.
   1610     bool is_64_bit = Primitive::Is64BitType(select->GetType());
   1611     if (value_true_loc.IsRegister()) {
   1612       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
   1613     } else {
   1614       __ cmov(cond,
   1615               value_false,
   1616               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
   1617     }
   1618   } else {
   1619     NearLabel false_target;
   1620     GenerateTestAndBranch<NearLabel>(select,
   1621                                      /* condition_input_index */ 2,
   1622                                      /* true_target */ nullptr,
   1623                                      &false_target);
   1624     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
   1625     __ Bind(&false_target);
   1626   }
   1627 }
   1628 
   1629 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
   1630   new (GetGraph()->GetArena()) LocationSummary(info);
   1631 }
   1632 
   1633 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
   1634   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
   1635 }
   1636 
   1637 void CodeGeneratorX86_64::GenerateNop() {
   1638   __ nop();
   1639 }
   1640 
   1641 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   1642   LocationSummary* locations =
   1643       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   1644   // Handle the long/FP comparisons made in instruction simplification.
   1645   switch (cond->InputAt(0)->GetType()) {
   1646     case Primitive::kPrimLong:
   1647       locations->SetInAt(0, Location::RequiresRegister());
   1648       locations->SetInAt(1, Location::Any());
   1649       break;
   1650     case Primitive::kPrimFloat:
   1651     case Primitive::kPrimDouble:
   1652       locations->SetInAt(0, Location::RequiresFpuRegister());
   1653       locations->SetInAt(1, Location::Any());
   1654       break;
   1655     default:
   1656       locations->SetInAt(0, Location::RequiresRegister());
   1657       locations->SetInAt(1, Location::Any());
   1658       break;
   1659   }
   1660   if (!cond->IsEmittedAtUseSite()) {
   1661     locations->SetOut(Location::RequiresRegister());
   1662   }
   1663 }
   1664 
   1665 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
   1666   if (cond->IsEmittedAtUseSite()) {
   1667     return;
   1668   }
   1669 
   1670   LocationSummary* locations = cond->GetLocations();
   1671   Location lhs = locations->InAt(0);
   1672   Location rhs = locations->InAt(1);
   1673   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
   1674   NearLabel true_label, false_label;
   1675 
   1676   switch (cond->InputAt(0)->GetType()) {
   1677     default:
   1678       // Integer case.
   1679 
   1680       // Clear output register: setcc only sets the low byte.
   1681       __ xorl(reg, reg);
   1682 
   1683       if (rhs.IsRegister()) {
   1684         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
   1685       } else if (rhs.IsConstant()) {
   1686         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
   1687         codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
   1688       } else {
   1689         __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1690       }
   1691       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
   1692       return;
   1693     case Primitive::kPrimLong:
   1694       // Clear output register: setcc only sets the low byte.
   1695       __ xorl(reg, reg);
   1696 
   1697       if (rhs.IsRegister()) {
   1698         __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
   1699       } else if (rhs.IsConstant()) {
   1700         int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
   1701         codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
   1702       } else {
   1703         __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1704       }
   1705       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
   1706       return;
   1707     case Primitive::kPrimFloat: {
   1708       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
   1709       if (rhs.IsConstant()) {
   1710         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
   1711         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
   1712       } else if (rhs.IsStackSlot()) {
   1713         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1714       } else {
   1715         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
   1716       }
   1717       GenerateFPJumps(cond, &true_label, &false_label);
   1718       break;
   1719     }
   1720     case Primitive::kPrimDouble: {
   1721       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
   1722       if (rhs.IsConstant()) {
   1723         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
   1724         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
   1725       } else if (rhs.IsDoubleStackSlot()) {
   1726         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
   1727       } else {
   1728         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
   1729       }
   1730       GenerateFPJumps(cond, &true_label, &false_label);
   1731       break;
   1732     }
   1733   }
   1734 
   1735   // Convert the jumps into the result.
   1736   NearLabel done_label;
   1737 
   1738   // False case: result = 0.
   1739   __ Bind(&false_label);
   1740   __ xorl(reg, reg);
   1741   __ jmp(&done_label);
   1742 
   1743   // True case: result = 1.
   1744   __ Bind(&true_label);
   1745   __ movl(reg, Immediate(1));
   1746   __ Bind(&done_label);
   1747 }
   1748 
   1749 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
   1750   HandleCondition(comp);
   1751 }
   1752 
   1753 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
   1754   HandleCondition(comp);
   1755 }
   1756 
   1757 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
   1758   HandleCondition(comp);
   1759 }
   1760 
   1761 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
   1762   HandleCondition(comp);
   1763 }
   1764 
   1765 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
   1766   HandleCondition(comp);
   1767 }
   1768 
   1769 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
   1770   HandleCondition(comp);
   1771 }
   1772 
   1773 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
   1774   HandleCondition(comp);
   1775 }
   1776 
   1777 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
   1778   HandleCondition(comp);
   1779 }
   1780 
   1781 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
   1782   HandleCondition(comp);
   1783 }
   1784 
   1785 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
   1786   HandleCondition(comp);
   1787 }
   1788 
   1789 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
   1790   HandleCondition(comp);
   1791 }
   1792 
   1793 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
   1794   HandleCondition(comp);
   1795 }
   1796 
   1797 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
   1798   HandleCondition(comp);
   1799 }
   1800 
   1801 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
   1802   HandleCondition(comp);
   1803 }
   1804 
   1805 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
   1806   HandleCondition(comp);
   1807 }
   1808 
   1809 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
   1810   HandleCondition(comp);
   1811 }
   1812 
   1813 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
   1814   HandleCondition(comp);
   1815 }
   1816 
   1817 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
   1818   HandleCondition(comp);
   1819 }
   1820 
   1821 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
   1822   HandleCondition(comp);
   1823 }
   1824 
   1825 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
   1826   HandleCondition(comp);
   1827 }
   1828 
   1829 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   1830   LocationSummary* locations =
   1831       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   1832   switch (compare->InputAt(0)->GetType()) {
   1833     case Primitive::kPrimBoolean:
   1834     case Primitive::kPrimByte:
   1835     case Primitive::kPrimShort:
   1836     case Primitive::kPrimChar:
   1837     case Primitive::kPrimInt:
   1838     case Primitive::kPrimLong: {
   1839       locations->SetInAt(0, Location::RequiresRegister());
   1840       locations->SetInAt(1, Location::Any());
   1841       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   1842       break;
   1843     }
   1844     case Primitive::kPrimFloat:
   1845     case Primitive::kPrimDouble: {
   1846       locations->SetInAt(0, Location::RequiresFpuRegister());
   1847       locations->SetInAt(1, Location::Any());
   1848       locations->SetOut(Location::RequiresRegister());
   1849       break;
   1850     }
   1851     default:
   1852       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
   1853   }
   1854 }
   1855 
   1856 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
   1857   LocationSummary* locations = compare->GetLocations();
   1858   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   1859   Location left = locations->InAt(0);
   1860   Location right = locations->InAt(1);
   1861 
   1862   NearLabel less, greater, done;
   1863   Primitive::Type type = compare->InputAt(0)->GetType();
   1864   Condition less_cond = kLess;
   1865 
   1866   switch (type) {
   1867     case Primitive::kPrimBoolean:
   1868     case Primitive::kPrimByte:
   1869     case Primitive::kPrimShort:
   1870     case Primitive::kPrimChar:
   1871     case Primitive::kPrimInt: {
   1872       CpuRegister left_reg = left.AsRegister<CpuRegister>();
   1873       if (right.IsConstant()) {
   1874         int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
   1875         codegen_->Compare32BitValue(left_reg, value);
   1876       } else if (right.IsStackSlot()) {
   1877         __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   1878       } else {
   1879         __ cmpl(left_reg, right.AsRegister<CpuRegister>());
   1880       }
   1881       break;
   1882     }
   1883     case Primitive::kPrimLong: {
   1884       CpuRegister left_reg = left.AsRegister<CpuRegister>();
   1885       if (right.IsConstant()) {
   1886         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
   1887         codegen_->Compare64BitValue(left_reg, value);
   1888       } else if (right.IsDoubleStackSlot()) {
   1889         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   1890       } else {
   1891         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
   1892       }
   1893       break;
   1894     }
   1895     case Primitive::kPrimFloat: {
   1896       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
   1897       if (right.IsConstant()) {
   1898         float value = right.GetConstant()->AsFloatConstant()->GetValue();
   1899         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
   1900       } else if (right.IsStackSlot()) {
   1901         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   1902       } else {
   1903         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
   1904       }
   1905       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
   1906       less_cond = kBelow;  //  ucomis{s,d} sets CF
   1907       break;
   1908     }
   1909     case Primitive::kPrimDouble: {
   1910       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
   1911       if (right.IsConstant()) {
   1912         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
   1913         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
   1914       } else if (right.IsDoubleStackSlot()) {
   1915         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
   1916       } else {
   1917         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
   1918       }
   1919       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
   1920       less_cond = kBelow;  //  ucomis{s,d} sets CF
   1921       break;
   1922     }
   1923     default:
   1924       LOG(FATAL) << "Unexpected compare type " << type;
   1925   }
   1926 
   1927   __ movl(out, Immediate(0));
   1928   __ j(kEqual, &done);
   1929   __ j(less_cond, &less);
   1930 
   1931   __ Bind(&greater);
   1932   __ movl(out, Immediate(1));
   1933   __ jmp(&done);
   1934 
   1935   __ Bind(&less);
   1936   __ movl(out, Immediate(-1));
   1937 
   1938   __ Bind(&done);
   1939 }
   1940 
   1941 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   1942   LocationSummary* locations =
   1943       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   1944   locations->SetOut(Location::ConstantLocation(constant));
   1945 }
   1946 
   1947 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   1948   // Will be generated at use site.
   1949 }
   1950 
   1951 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
   1952   LocationSummary* locations =
   1953       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   1954   locations->SetOut(Location::ConstantLocation(constant));
   1955 }
   1956 
   1957 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   1958   // Will be generated at use site.
   1959 }
   1960 
   1961 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
   1962   LocationSummary* locations =
   1963       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   1964   locations->SetOut(Location::ConstantLocation(constant));
   1965 }
   1966 
   1967 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   1968   // Will be generated at use site.
   1969 }
   1970 
   1971 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
   1972   LocationSummary* locations =
   1973       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   1974   locations->SetOut(Location::ConstantLocation(constant));
   1975 }
   1976 
   1977 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   1978   // Will be generated at use site.
   1979 }
   1980 
   1981 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
   1982   LocationSummary* locations =
   1983       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   1984   locations->SetOut(Location::ConstantLocation(constant));
   1985 }
   1986 
   1987 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
   1988     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   1989   // Will be generated at use site.
   1990 }
   1991 
   1992 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   1993   memory_barrier->SetLocations(nullptr);
   1994 }
   1995 
   1996 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   1997   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
   1998 }
   1999 
   2000 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
   2001   ret->SetLocations(nullptr);
   2002 }
   2003 
   2004 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
   2005   codegen_->GenerateFrameExit();
   2006 }
   2007 
   2008 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
   2009   LocationSummary* locations =
   2010       new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
   2011   switch (ret->InputAt(0)->GetType()) {
   2012     case Primitive::kPrimBoolean:
   2013     case Primitive::kPrimByte:
   2014     case Primitive::kPrimChar:
   2015     case Primitive::kPrimShort:
   2016     case Primitive::kPrimInt:
   2017     case Primitive::kPrimNot:
   2018     case Primitive::kPrimLong:
   2019       locations->SetInAt(0, Location::RegisterLocation(RAX));
   2020       break;
   2021 
   2022     case Primitive::kPrimFloat:
   2023     case Primitive::kPrimDouble:
   2024       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
   2025       break;
   2026 
   2027     default:
   2028       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   2029   }
   2030 }
   2031 
   2032 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
   2033   if (kIsDebugBuild) {
   2034     switch (ret->InputAt(0)->GetType()) {
   2035       case Primitive::kPrimBoolean:
   2036       case Primitive::kPrimByte:
   2037       case Primitive::kPrimChar:
   2038       case Primitive::kPrimShort:
   2039       case Primitive::kPrimInt:
   2040       case Primitive::kPrimNot:
   2041       case Primitive::kPrimLong:
   2042         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
   2043         break;
   2044 
   2045       case Primitive::kPrimFloat:
   2046       case Primitive::kPrimDouble:
   2047         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
   2048                   XMM0);
   2049         break;
   2050 
   2051       default:
   2052         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   2053     }
   2054   }
   2055   codegen_->GenerateFrameExit();
   2056 }
   2057 
   2058 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
   2059   switch (type) {
   2060     case Primitive::kPrimBoolean:
   2061     case Primitive::kPrimByte:
   2062     case Primitive::kPrimChar:
   2063     case Primitive::kPrimShort:
   2064     case Primitive::kPrimInt:
   2065     case Primitive::kPrimNot:
   2066     case Primitive::kPrimLong:
   2067       return Location::RegisterLocation(RAX);
   2068 
   2069     case Primitive::kPrimVoid:
   2070       return Location::NoLocation();
   2071 
   2072     case Primitive::kPrimDouble:
   2073     case Primitive::kPrimFloat:
   2074       return Location::FpuRegisterLocation(XMM0);
   2075   }
   2076 
   2077   UNREACHABLE();
   2078 }
   2079 
   2080 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
   2081   return Location::RegisterLocation(kMethodRegisterArgument);
   2082 }
   2083 
   2084 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
   2085   switch (type) {
   2086     case Primitive::kPrimBoolean:
   2087     case Primitive::kPrimByte:
   2088     case Primitive::kPrimChar:
   2089     case Primitive::kPrimShort:
   2090     case Primitive::kPrimInt:
   2091     case Primitive::kPrimNot: {
   2092       uint32_t index = gp_index_++;
   2093       stack_index_++;
   2094       if (index < calling_convention.GetNumberOfRegisters()) {
   2095         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
   2096       } else {
   2097         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
   2098       }
   2099     }
   2100 
   2101     case Primitive::kPrimLong: {
   2102       uint32_t index = gp_index_;
   2103       stack_index_ += 2;
   2104       if (index < calling_convention.GetNumberOfRegisters()) {
   2105         gp_index_ += 1;
   2106         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
   2107       } else {
   2108         gp_index_ += 2;
   2109         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
   2110       }
   2111     }
   2112 
   2113     case Primitive::kPrimFloat: {
   2114       uint32_t index = float_index_++;
   2115       stack_index_++;
   2116       if (index < calling_convention.GetNumberOfFpuRegisters()) {
   2117         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
   2118       } else {
   2119         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
   2120       }
   2121     }
   2122 
   2123     case Primitive::kPrimDouble: {
   2124       uint32_t index = float_index_++;
   2125       stack_index_ += 2;
   2126       if (index < calling_convention.GetNumberOfFpuRegisters()) {
   2127         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
   2128       } else {
   2129         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
   2130       }
   2131     }
   2132 
   2133     case Primitive::kPrimVoid:
   2134       LOG(FATAL) << "Unexpected parameter type " << type;
   2135       break;
   2136   }
   2137   return Location::NoLocation();
   2138 }
   2139 
   2140 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   2141   // The trampoline uses the same calling convention as dex calling conventions,
   2142   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
   2143   // the method_idx.
   2144   HandleInvoke(invoke);
   2145 }
   2146 
   2147 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   2148   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
   2149 }
   2150 
   2151 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   2152   // Explicit clinit checks triggered by static invokes must have been pruned by
   2153   // art::PrepareForRegisterAllocation.
   2154   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   2155 
   2156   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   2157   if (intrinsic.TryDispatch(invoke)) {
   2158     return;
   2159   }
   2160 
   2161   HandleInvoke(invoke);
   2162 }
   2163 
   2164 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
   2165   if (invoke->GetLocations()->Intrinsified()) {
   2166     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
   2167     intrinsic.Dispatch(invoke);
   2168     return true;
   2169   }
   2170   return false;
   2171 }
   2172 
   2173 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   2174   // Explicit clinit checks triggered by static invokes must have been pruned by
   2175   // art::PrepareForRegisterAllocation.
   2176   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
   2177 
   2178   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   2179     return;
   2180   }
   2181 
   2182   LocationSummary* locations = invoke->GetLocations();
   2183   codegen_->GenerateStaticOrDirectCall(
   2184       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
   2185   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   2186 }
   2187 
   2188 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
   2189   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
   2190   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
   2191 }
   2192 
   2193 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   2194   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   2195   if (intrinsic.TryDispatch(invoke)) {
   2196     return;
   2197   }
   2198 
   2199   HandleInvoke(invoke);
   2200 }
   2201 
   2202 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   2203   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
   2204     return;
   2205   }
   2206 
   2207   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   2208   DCHECK(!codegen_->IsLeafMethod());
   2209   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   2210 }
   2211 
   2212 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   2213   HandleInvoke(invoke);
   2214   // Add the hidden argument.
   2215   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
   2216 }
   2217 
   2218 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   2219   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   2220   LocationSummary* locations = invoke->GetLocations();
   2221   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   2222   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   2223   Location receiver = locations->InAt(0);
   2224   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
   2225 
   2226   // Set the hidden argument. This is safe to do this here, as RAX
   2227   // won't be modified thereafter, before the `call` instruction.
   2228   DCHECK_EQ(RAX, hidden_reg.AsRegister());
   2229   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
   2230 
   2231   if (receiver.IsStackSlot()) {
   2232     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
   2233     // /* HeapReference<Class> */ temp = temp->klass_
   2234     __ movl(temp, Address(temp, class_offset));
   2235   } else {
   2236     // /* HeapReference<Class> */ temp = receiver->klass_
   2237     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   2238   }
   2239   codegen_->MaybeRecordImplicitNullCheck(invoke);
   2240   // Instead of simply (possibly) unpoisoning `temp` here, we should
   2241   // emit a read barrier for the previous class reference load.
   2242   // However this is not required in practice, as this is an
   2243   // intermediate/temporary reference and because the current
   2244   // concurrent copying collector keeps the from-space memory
   2245   // intact/accessible until the end of the marking phase (the
   2246   // concurrent copying collector may not in the future).
   2247   __ MaybeUnpoisonHeapReference(temp);
   2248   // temp = temp->GetAddressOfIMT()
   2249   __ movq(temp,
   2250       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   2251   // temp = temp->GetImtEntryAt(method_offset);
   2252   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   2253       invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
   2254   // temp = temp->GetImtEntryAt(method_offset);
   2255   __ movq(temp, Address(temp, method_offset));
   2256   // call temp->GetEntryPoint();
   2257   __ call(Address(temp,
   2258                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
   2259 
   2260   DCHECK(!codegen_->IsLeafMethod());
   2261   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   2262 }
   2263 
   2264 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
   2265   LocationSummary* locations =
   2266       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   2267   switch (neg->GetResultType()) {
   2268     case Primitive::kPrimInt:
   2269     case Primitive::kPrimLong:
   2270       locations->SetInAt(0, Location::RequiresRegister());
   2271       locations->SetOut(Location::SameAsFirstInput());
   2272       break;
   2273 
   2274     case Primitive::kPrimFloat:
   2275     case Primitive::kPrimDouble:
   2276       locations->SetInAt(0, Location::RequiresFpuRegister());
   2277       locations->SetOut(Location::SameAsFirstInput());
   2278       locations->AddTemp(Location::RequiresFpuRegister());
   2279       break;
   2280 
   2281     default:
   2282       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   2283   }
   2284 }
   2285 
   2286 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
   2287   LocationSummary* locations = neg->GetLocations();
   2288   Location out = locations->Out();
   2289   Location in = locations->InAt(0);
   2290   switch (neg->GetResultType()) {
   2291     case Primitive::kPrimInt:
   2292       DCHECK(in.IsRegister());
   2293       DCHECK(in.Equals(out));
   2294       __ negl(out.AsRegister<CpuRegister>());
   2295       break;
   2296 
   2297     case Primitive::kPrimLong:
   2298       DCHECK(in.IsRegister());
   2299       DCHECK(in.Equals(out));
   2300       __ negq(out.AsRegister<CpuRegister>());
   2301       break;
   2302 
   2303     case Primitive::kPrimFloat: {
   2304       DCHECK(in.Equals(out));
   2305       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2306       // Implement float negation with an exclusive or with value
   2307       // 0x80000000 (mask for bit 31, representing the sign of a
   2308       // single-precision floating-point number).
   2309       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
   2310       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
   2311       break;
   2312     }
   2313 
   2314     case Primitive::kPrimDouble: {
   2315       DCHECK(in.Equals(out));
   2316       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   2317       // Implement double negation with an exclusive or with value
   2318       // 0x8000000000000000 (mask for bit 63, representing the sign of
   2319       // a double-precision floating-point number).
   2320       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
   2321       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
   2322       break;
   2323     }
   2324 
   2325     default:
   2326       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
   2327   }
   2328 }
   2329 
   2330 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
   2331   LocationSummary* locations =
   2332       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   2333   Primitive::Type result_type = conversion->GetResultType();
   2334   Primitive::Type input_type = conversion->GetInputType();
   2335   DCHECK_NE(result_type, input_type);
   2336 
   2337   // The Java language does not allow treating boolean as an integral type but
   2338   // our bit representation makes it safe.
   2339 
   2340   switch (result_type) {
   2341     case Primitive::kPrimByte:
   2342       switch (input_type) {
   2343         case Primitive::kPrimLong:
   2344           // Type conversion from long to byte is a result of code transformations.
   2345         case Primitive::kPrimBoolean:
   2346           // Boolean input is a result of code transformations.
   2347         case Primitive::kPrimShort:
   2348         case Primitive::kPrimInt:
   2349         case Primitive::kPrimChar:
   2350           // Processing a Dex `int-to-byte' instruction.
   2351           locations->SetInAt(0, Location::Any());
   2352           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2353           break;
   2354 
   2355         default:
   2356           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2357                      << " to " << result_type;
   2358       }
   2359       break;
   2360 
   2361     case Primitive::kPrimShort:
   2362       switch (input_type) {
   2363         case Primitive::kPrimLong:
   2364           // Type conversion from long to short is a result of code transformations.
   2365         case Primitive::kPrimBoolean:
   2366           // Boolean input is a result of code transformations.
   2367         case Primitive::kPrimByte:
   2368         case Primitive::kPrimInt:
   2369         case Primitive::kPrimChar:
   2370           // Processing a Dex `int-to-short' instruction.
   2371           locations->SetInAt(0, Location::Any());
   2372           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2373           break;
   2374 
   2375         default:
   2376           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2377                      << " to " << result_type;
   2378       }
   2379       break;
   2380 
   2381     case Primitive::kPrimInt:
   2382       switch (input_type) {
   2383         case Primitive::kPrimLong:
   2384           // Processing a Dex `long-to-int' instruction.
   2385           locations->SetInAt(0, Location::Any());
   2386           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2387           break;
   2388 
   2389         case Primitive::kPrimFloat:
   2390           // Processing a Dex `float-to-int' instruction.
   2391           locations->SetInAt(0, Location::RequiresFpuRegister());
   2392           locations->SetOut(Location::RequiresRegister());
   2393           break;
   2394 
   2395         case Primitive::kPrimDouble:
   2396           // Processing a Dex `double-to-int' instruction.
   2397           locations->SetInAt(0, Location::RequiresFpuRegister());
   2398           locations->SetOut(Location::RequiresRegister());
   2399           break;
   2400 
   2401         default:
   2402           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2403                      << " to " << result_type;
   2404       }
   2405       break;
   2406 
   2407     case Primitive::kPrimLong:
   2408       switch (input_type) {
   2409         case Primitive::kPrimBoolean:
   2410           // Boolean input is a result of code transformations.
   2411         case Primitive::kPrimByte:
   2412         case Primitive::kPrimShort:
   2413         case Primitive::kPrimInt:
   2414         case Primitive::kPrimChar:
   2415           // Processing a Dex `int-to-long' instruction.
   2416           // TODO: We would benefit from a (to-be-implemented)
   2417           // Location::RegisterOrStackSlot requirement for this input.
   2418           locations->SetInAt(0, Location::RequiresRegister());
   2419           locations->SetOut(Location::RequiresRegister());
   2420           break;
   2421 
   2422         case Primitive::kPrimFloat:
   2423           // Processing a Dex `float-to-long' instruction.
   2424           locations->SetInAt(0, Location::RequiresFpuRegister());
   2425           locations->SetOut(Location::RequiresRegister());
   2426           break;
   2427 
   2428         case Primitive::kPrimDouble:
   2429           // Processing a Dex `double-to-long' instruction.
   2430           locations->SetInAt(0, Location::RequiresFpuRegister());
   2431           locations->SetOut(Location::RequiresRegister());
   2432           break;
   2433 
   2434         default:
   2435           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2436                      << " to " << result_type;
   2437       }
   2438       break;
   2439 
   2440     case Primitive::kPrimChar:
   2441       switch (input_type) {
   2442         case Primitive::kPrimLong:
   2443           // Type conversion from long to char is a result of code transformations.
   2444         case Primitive::kPrimBoolean:
   2445           // Boolean input is a result of code transformations.
   2446         case Primitive::kPrimByte:
   2447         case Primitive::kPrimShort:
   2448         case Primitive::kPrimInt:
   2449           // Processing a Dex `int-to-char' instruction.
   2450           locations->SetInAt(0, Location::Any());
   2451           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2452           break;
   2453 
   2454         default:
   2455           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2456                      << " to " << result_type;
   2457       }
   2458       break;
   2459 
   2460     case Primitive::kPrimFloat:
   2461       switch (input_type) {
   2462         case Primitive::kPrimBoolean:
   2463           // Boolean input is a result of code transformations.
   2464         case Primitive::kPrimByte:
   2465         case Primitive::kPrimShort:
   2466         case Primitive::kPrimInt:
   2467         case Primitive::kPrimChar:
   2468           // Processing a Dex `int-to-float' instruction.
   2469           locations->SetInAt(0, Location::Any());
   2470           locations->SetOut(Location::RequiresFpuRegister());
   2471           break;
   2472 
   2473         case Primitive::kPrimLong:
   2474           // Processing a Dex `long-to-float' instruction.
   2475           locations->SetInAt(0, Location::Any());
   2476           locations->SetOut(Location::RequiresFpuRegister());
   2477           break;
   2478 
   2479         case Primitive::kPrimDouble:
   2480           // Processing a Dex `double-to-float' instruction.
   2481           locations->SetInAt(0, Location::Any());
   2482           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2483           break;
   2484 
   2485         default:
   2486           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2487                      << " to " << result_type;
   2488       };
   2489       break;
   2490 
   2491     case Primitive::kPrimDouble:
   2492       switch (input_type) {
   2493         case Primitive::kPrimBoolean:
   2494           // Boolean input is a result of code transformations.
   2495         case Primitive::kPrimByte:
   2496         case Primitive::kPrimShort:
   2497         case Primitive::kPrimInt:
   2498         case Primitive::kPrimChar:
   2499           // Processing a Dex `int-to-double' instruction.
   2500           locations->SetInAt(0, Location::Any());
   2501           locations->SetOut(Location::RequiresFpuRegister());
   2502           break;
   2503 
   2504         case Primitive::kPrimLong:
   2505           // Processing a Dex `long-to-double' instruction.
   2506           locations->SetInAt(0, Location::Any());
   2507           locations->SetOut(Location::RequiresFpuRegister());
   2508           break;
   2509 
   2510         case Primitive::kPrimFloat:
   2511           // Processing a Dex `float-to-double' instruction.
   2512           locations->SetInAt(0, Location::Any());
   2513           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   2514           break;
   2515 
   2516         default:
   2517           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2518                      << " to " << result_type;
   2519       }
   2520       break;
   2521 
   2522     default:
   2523       LOG(FATAL) << "Unexpected type conversion from " << input_type
   2524                  << " to " << result_type;
   2525   }
   2526 }
   2527 
   2528 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
   2529   LocationSummary* locations = conversion->GetLocations();
   2530   Location out = locations->Out();
   2531   Location in = locations->InAt(0);
   2532   Primitive::Type result_type = conversion->GetResultType();
   2533   Primitive::Type input_type = conversion->GetInputType();
   2534   DCHECK_NE(result_type, input_type);
   2535   switch (result_type) {
   2536     case Primitive::kPrimByte:
   2537       switch (input_type) {
   2538         case Primitive::kPrimLong:
   2539           // Type conversion from long to byte is a result of code transformations.
   2540         case Primitive::kPrimBoolean:
   2541           // Boolean input is a result of code transformations.
   2542         case Primitive::kPrimShort:
   2543         case Primitive::kPrimInt:
   2544         case Primitive::kPrimChar:
   2545           // Processing a Dex `int-to-byte' instruction.
   2546           if (in.IsRegister()) {
   2547             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2548           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2549             __ movsxb(out.AsRegister<CpuRegister>(),
   2550                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2551           } else {
   2552             __ movl(out.AsRegister<CpuRegister>(),
   2553                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
   2554           }
   2555           break;
   2556 
   2557         default:
   2558           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2559                      << " to " << result_type;
   2560       }
   2561       break;
   2562 
   2563     case Primitive::kPrimShort:
   2564       switch (input_type) {
   2565         case Primitive::kPrimLong:
   2566           // Type conversion from long to short is a result of code transformations.
   2567         case Primitive::kPrimBoolean:
   2568           // Boolean input is a result of code transformations.
   2569         case Primitive::kPrimByte:
   2570         case Primitive::kPrimInt:
   2571         case Primitive::kPrimChar:
   2572           // Processing a Dex `int-to-short' instruction.
   2573           if (in.IsRegister()) {
   2574             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2575           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2576             __ movsxw(out.AsRegister<CpuRegister>(),
   2577                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2578           } else {
   2579             __ movl(out.AsRegister<CpuRegister>(),
   2580                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
   2581           }
   2582           break;
   2583 
   2584         default:
   2585           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2586                      << " to " << result_type;
   2587       }
   2588       break;
   2589 
   2590     case Primitive::kPrimInt:
   2591       switch (input_type) {
   2592         case Primitive::kPrimLong:
   2593           // Processing a Dex `long-to-int' instruction.
   2594           if (in.IsRegister()) {
   2595             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2596           } else if (in.IsDoubleStackSlot()) {
   2597             __ movl(out.AsRegister<CpuRegister>(),
   2598                     Address(CpuRegister(RSP), in.GetStackIndex()));
   2599           } else {
   2600             DCHECK(in.IsConstant());
   2601             DCHECK(in.GetConstant()->IsLongConstant());
   2602             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
   2603             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
   2604           }
   2605           break;
   2606 
   2607         case Primitive::kPrimFloat: {
   2608           // Processing a Dex `float-to-int' instruction.
   2609           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2610           CpuRegister output = out.AsRegister<CpuRegister>();
   2611           NearLabel done, nan;
   2612 
   2613           __ movl(output, Immediate(kPrimIntMax));
   2614           // if input >= (float)INT_MAX goto done
   2615           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
   2616           __ j(kAboveEqual, &done);
   2617           // if input == NaN goto nan
   2618           __ j(kUnordered, &nan);
   2619           // output = float-to-int-truncate(input)
   2620           __ cvttss2si(output, input, false);
   2621           __ jmp(&done);
   2622           __ Bind(&nan);
   2623           //  output = 0
   2624           __ xorl(output, output);
   2625           __ Bind(&done);
   2626           break;
   2627         }
   2628 
   2629         case Primitive::kPrimDouble: {
   2630           // Processing a Dex `double-to-int' instruction.
   2631           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2632           CpuRegister output = out.AsRegister<CpuRegister>();
   2633           NearLabel done, nan;
   2634 
   2635           __ movl(output, Immediate(kPrimIntMax));
   2636           // if input >= (double)INT_MAX goto done
   2637           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
   2638           __ j(kAboveEqual, &done);
   2639           // if input == NaN goto nan
   2640           __ j(kUnordered, &nan);
   2641           // output = double-to-int-truncate(input)
   2642           __ cvttsd2si(output, input);
   2643           __ jmp(&done);
   2644           __ Bind(&nan);
   2645           //  output = 0
   2646           __ xorl(output, output);
   2647           __ Bind(&done);
   2648           break;
   2649         }
   2650 
   2651         default:
   2652           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2653                      << " to " << result_type;
   2654       }
   2655       break;
   2656 
   2657     case Primitive::kPrimLong:
   2658       switch (input_type) {
   2659         DCHECK(out.IsRegister());
   2660         case Primitive::kPrimBoolean:
   2661           // Boolean input is a result of code transformations.
   2662         case Primitive::kPrimByte:
   2663         case Primitive::kPrimShort:
   2664         case Primitive::kPrimInt:
   2665         case Primitive::kPrimChar:
   2666           // Processing a Dex `int-to-long' instruction.
   2667           DCHECK(in.IsRegister());
   2668           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2669           break;
   2670 
   2671         case Primitive::kPrimFloat: {
   2672           // Processing a Dex `float-to-long' instruction.
   2673           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2674           CpuRegister output = out.AsRegister<CpuRegister>();
   2675           NearLabel done, nan;
   2676 
   2677           codegen_->Load64BitValue(output, kPrimLongMax);
   2678           // if input >= (float)LONG_MAX goto done
   2679           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
   2680           __ j(kAboveEqual, &done);
   2681           // if input == NaN goto nan
   2682           __ j(kUnordered, &nan);
   2683           // output = float-to-long-truncate(input)
   2684           __ cvttss2si(output, input, true);
   2685           __ jmp(&done);
   2686           __ Bind(&nan);
   2687           //  output = 0
   2688           __ xorl(output, output);
   2689           __ Bind(&done);
   2690           break;
   2691         }
   2692 
   2693         case Primitive::kPrimDouble: {
   2694           // Processing a Dex `double-to-long' instruction.
   2695           XmmRegister input = in.AsFpuRegister<XmmRegister>();
   2696           CpuRegister output = out.AsRegister<CpuRegister>();
   2697           NearLabel done, nan;
   2698 
   2699           codegen_->Load64BitValue(output, kPrimLongMax);
   2700           // if input >= (double)LONG_MAX goto done
   2701           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
   2702           __ j(kAboveEqual, &done);
   2703           // if input == NaN goto nan
   2704           __ j(kUnordered, &nan);
   2705           // output = double-to-long-truncate(input)
   2706           __ cvttsd2si(output, input, true);
   2707           __ jmp(&done);
   2708           __ Bind(&nan);
   2709           //  output = 0
   2710           __ xorl(output, output);
   2711           __ Bind(&done);
   2712           break;
   2713         }
   2714 
   2715         default:
   2716           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2717                      << " to " << result_type;
   2718       }
   2719       break;
   2720 
   2721     case Primitive::kPrimChar:
   2722       switch (input_type) {
   2723         case Primitive::kPrimLong:
   2724           // Type conversion from long to char is a result of code transformations.
   2725         case Primitive::kPrimBoolean:
   2726           // Boolean input is a result of code transformations.
   2727         case Primitive::kPrimByte:
   2728         case Primitive::kPrimShort:
   2729         case Primitive::kPrimInt:
   2730           // Processing a Dex `int-to-char' instruction.
   2731           if (in.IsRegister()) {
   2732             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
   2733           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
   2734             __ movzxw(out.AsRegister<CpuRegister>(),
   2735                       Address(CpuRegister(RSP), in.GetStackIndex()));
   2736           } else {
   2737             __ movl(out.AsRegister<CpuRegister>(),
   2738                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
   2739           }
   2740           break;
   2741 
   2742         default:
   2743           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2744                      << " to " << result_type;
   2745       }
   2746       break;
   2747 
   2748     case Primitive::kPrimFloat:
   2749       switch (input_type) {
   2750         case Primitive::kPrimBoolean:
   2751           // Boolean input is a result of code transformations.
   2752         case Primitive::kPrimByte:
   2753         case Primitive::kPrimShort:
   2754         case Primitive::kPrimInt:
   2755         case Primitive::kPrimChar:
   2756           // Processing a Dex `int-to-float' instruction.
   2757           if (in.IsRegister()) {
   2758             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
   2759           } else if (in.IsConstant()) {
   2760             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
   2761             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2762             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2763           } else {
   2764             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
   2765                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
   2766           }
   2767           break;
   2768 
   2769         case Primitive::kPrimLong:
   2770           // Processing a Dex `long-to-float' instruction.
   2771           if (in.IsRegister()) {
   2772             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
   2773           } else if (in.IsConstant()) {
   2774             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
   2775             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2776             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2777           } else {
   2778             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
   2779                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
   2780           }
   2781           break;
   2782 
   2783         case Primitive::kPrimDouble:
   2784           // Processing a Dex `double-to-float' instruction.
   2785           if (in.IsFpuRegister()) {
   2786             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
   2787           } else if (in.IsConstant()) {
   2788             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
   2789             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2790             codegen_->Load32BitValue(dest, static_cast<float>(v));
   2791           } else {
   2792             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
   2793                         Address(CpuRegister(RSP), in.GetStackIndex()));
   2794           }
   2795           break;
   2796 
   2797         default:
   2798           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2799                      << " to " << result_type;
   2800       };
   2801       break;
   2802 
   2803     case Primitive::kPrimDouble:
   2804       switch (input_type) {
   2805         case Primitive::kPrimBoolean:
   2806           // Boolean input is a result of code transformations.
   2807         case Primitive::kPrimByte:
   2808         case Primitive::kPrimShort:
   2809         case Primitive::kPrimInt:
   2810         case Primitive::kPrimChar:
   2811           // Processing a Dex `int-to-double' instruction.
   2812           if (in.IsRegister()) {
   2813             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
   2814           } else if (in.IsConstant()) {
   2815             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
   2816             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2817             codegen_->Load64BitValue(dest, static_cast<double>(v));
   2818           } else {
   2819             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
   2820                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
   2821           }
   2822           break;
   2823 
   2824         case Primitive::kPrimLong:
   2825           // Processing a Dex `long-to-double' instruction.
   2826           if (in.IsRegister()) {
   2827             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
   2828           } else if (in.IsConstant()) {
   2829             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
   2830             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2831             codegen_->Load64BitValue(dest, static_cast<double>(v));
   2832           } else {
   2833             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
   2834                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
   2835           }
   2836           break;
   2837 
   2838         case Primitive::kPrimFloat:
   2839           // Processing a Dex `float-to-double' instruction.
   2840           if (in.IsFpuRegister()) {
   2841             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
   2842           } else if (in.IsConstant()) {
   2843             float v = in.GetConstant()->AsFloatConstant()->GetValue();
   2844             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
   2845             codegen_->Load64BitValue(dest, static_cast<double>(v));
   2846           } else {
   2847             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
   2848                         Address(CpuRegister(RSP), in.GetStackIndex()));
   2849           }
   2850           break;
   2851 
   2852         default:
   2853           LOG(FATAL) << "Unexpected type conversion from " << input_type
   2854                      << " to " << result_type;
   2855       };
   2856       break;
   2857 
   2858     default:
   2859       LOG(FATAL) << "Unexpected type conversion from " << input_type
   2860                  << " to " << result_type;
   2861   }
   2862 }
   2863 
   2864 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
   2865   LocationSummary* locations =
   2866       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
   2867   switch (add->GetResultType()) {
   2868     case Primitive::kPrimInt: {
   2869       locations->SetInAt(0, Location::RequiresRegister());
   2870       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
   2871       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2872       break;
   2873     }
   2874 
   2875     case Primitive::kPrimLong: {
   2876       locations->SetInAt(0, Location::RequiresRegister());
   2877       // We can use a leaq or addq if the constant can fit in an immediate.
   2878       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
   2879       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   2880       break;
   2881     }
   2882 
   2883     case Primitive::kPrimDouble:
   2884     case Primitive::kPrimFloat: {
   2885       locations->SetInAt(0, Location::RequiresFpuRegister());
   2886       locations->SetInAt(1, Location::Any());
   2887       locations->SetOut(Location::SameAsFirstInput());
   2888       break;
   2889     }
   2890 
   2891     default:
   2892       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   2893   }
   2894 }
   2895 
   2896 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   2897   LocationSummary* locations = add->GetLocations();
   2898   Location first = locations->InAt(0);
   2899   Location second = locations->InAt(1);
   2900   Location out = locations->Out();
   2901 
   2902   switch (add->GetResultType()) {
   2903     case Primitive::kPrimInt: {
   2904       if (second.IsRegister()) {
   2905         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   2906           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   2907         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
   2908           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
   2909         } else {
   2910           __ leal(out.AsRegister<CpuRegister>(), Address(
   2911               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
   2912         }
   2913       } else if (second.IsConstant()) {
   2914         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   2915           __ addl(out.AsRegister<CpuRegister>(),
   2916                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
   2917         } else {
   2918           __ leal(out.AsRegister<CpuRegister>(), Address(
   2919               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
   2920         }
   2921       } else {
   2922         DCHECK(first.Equals(locations->Out()));
   2923         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
   2924       }
   2925       break;
   2926     }
   2927 
   2928     case Primitive::kPrimLong: {
   2929       if (second.IsRegister()) {
   2930         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   2931           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   2932         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
   2933           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
   2934         } else {
   2935           __ leaq(out.AsRegister<CpuRegister>(), Address(
   2936               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
   2937         }
   2938       } else {
   2939         DCHECK(second.IsConstant());
   2940         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
   2941         int32_t int32_value = Low32Bits(value);
   2942         DCHECK_EQ(int32_value, value);
   2943         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
   2944           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
   2945         } else {
   2946           __ leaq(out.AsRegister<CpuRegister>(), Address(
   2947               first.AsRegister<CpuRegister>(), int32_value));
   2948         }
   2949       }
   2950       break;
   2951     }
   2952 
   2953     case Primitive::kPrimFloat: {
   2954       if (second.IsFpuRegister()) {
   2955         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   2956       } else if (second.IsConstant()) {
   2957         __ addss(first.AsFpuRegister<XmmRegister>(),
   2958                  codegen_->LiteralFloatAddress(
   2959                      second.GetConstant()->AsFloatConstant()->GetValue()));
   2960       } else {
   2961         DCHECK(second.IsStackSlot());
   2962         __ addss(first.AsFpuRegister<XmmRegister>(),
   2963                  Address(CpuRegister(RSP), second.GetStackIndex()));
   2964       }
   2965       break;
   2966     }
   2967 
   2968     case Primitive::kPrimDouble: {
   2969       if (second.IsFpuRegister()) {
   2970         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   2971       } else if (second.IsConstant()) {
   2972         __ addsd(first.AsFpuRegister<XmmRegister>(),
   2973                  codegen_->LiteralDoubleAddress(
   2974                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   2975       } else {
   2976         DCHECK(second.IsDoubleStackSlot());
   2977         __ addsd(first.AsFpuRegister<XmmRegister>(),
   2978                  Address(CpuRegister(RSP), second.GetStackIndex()));
   2979       }
   2980       break;
   2981     }
   2982 
   2983     default:
   2984       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   2985   }
   2986 }
   2987 
   2988 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
   2989   LocationSummary* locations =
   2990       new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
   2991   switch (sub->GetResultType()) {
   2992     case Primitive::kPrimInt: {
   2993       locations->SetInAt(0, Location::RequiresRegister());
   2994       locations->SetInAt(1, Location::Any());
   2995       locations->SetOut(Location::SameAsFirstInput());
   2996       break;
   2997     }
   2998     case Primitive::kPrimLong: {
   2999       locations->SetInAt(0, Location::RequiresRegister());
   3000       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
   3001       locations->SetOut(Location::SameAsFirstInput());
   3002       break;
   3003     }
   3004     case Primitive::kPrimFloat:
   3005     case Primitive::kPrimDouble: {
   3006       locations->SetInAt(0, Location::RequiresFpuRegister());
   3007       locations->SetInAt(1, Location::Any());
   3008       locations->SetOut(Location::SameAsFirstInput());
   3009       break;
   3010     }
   3011     default:
   3012       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   3013   }
   3014 }
   3015 
   3016 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
   3017   LocationSummary* locations = sub->GetLocations();
   3018   Location first = locations->InAt(0);
   3019   Location second = locations->InAt(1);
   3020   DCHECK(first.Equals(locations->Out()));
   3021   switch (sub->GetResultType()) {
   3022     case Primitive::kPrimInt: {
   3023       if (second.IsRegister()) {
   3024         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3025       } else if (second.IsConstant()) {
   3026         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
   3027         __ subl(first.AsRegister<CpuRegister>(), imm);
   3028       } else {
   3029         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
   3030       }
   3031       break;
   3032     }
   3033     case Primitive::kPrimLong: {
   3034       if (second.IsConstant()) {
   3035         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
   3036         DCHECK(IsInt<32>(value));
   3037         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
   3038       } else {
   3039         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3040       }
   3041       break;
   3042     }
   3043 
   3044     case Primitive::kPrimFloat: {
   3045       if (second.IsFpuRegister()) {
   3046         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3047       } else if (second.IsConstant()) {
   3048         __ subss(first.AsFpuRegister<XmmRegister>(),
   3049                  codegen_->LiteralFloatAddress(
   3050                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3051       } else {
   3052         DCHECK(second.IsStackSlot());
   3053         __ subss(first.AsFpuRegister<XmmRegister>(),
   3054                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3055       }
   3056       break;
   3057     }
   3058 
   3059     case Primitive::kPrimDouble: {
   3060       if (second.IsFpuRegister()) {
   3061         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3062       } else if (second.IsConstant()) {
   3063         __ subsd(first.AsFpuRegister<XmmRegister>(),
   3064                  codegen_->LiteralDoubleAddress(
   3065                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3066       } else {
   3067         DCHECK(second.IsDoubleStackSlot());
   3068         __ subsd(first.AsFpuRegister<XmmRegister>(),
   3069                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3070       }
   3071       break;
   3072     }
   3073 
   3074     default:
   3075       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   3076   }
   3077 }
   3078 
   3079 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
   3080   LocationSummary* locations =
   3081       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
   3082   switch (mul->GetResultType()) {
   3083     case Primitive::kPrimInt: {
   3084       locations->SetInAt(0, Location::RequiresRegister());
   3085       locations->SetInAt(1, Location::Any());
   3086       if (mul->InputAt(1)->IsIntConstant()) {
   3087         // Can use 3 operand multiply.
   3088         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3089       } else {
   3090         locations->SetOut(Location::SameAsFirstInput());
   3091       }
   3092       break;
   3093     }
   3094     case Primitive::kPrimLong: {
   3095       locations->SetInAt(0, Location::RequiresRegister());
   3096       locations->SetInAt(1, Location::Any());
   3097       if (mul->InputAt(1)->IsLongConstant() &&
   3098           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
   3099         // Can use 3 operand multiply.
   3100         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   3101       } else {
   3102         locations->SetOut(Location::SameAsFirstInput());
   3103       }
   3104       break;
   3105     }
   3106     case Primitive::kPrimFloat:
   3107     case Primitive::kPrimDouble: {
   3108       locations->SetInAt(0, Location::RequiresFpuRegister());
   3109       locations->SetInAt(1, Location::Any());
   3110       locations->SetOut(Location::SameAsFirstInput());
   3111       break;
   3112     }
   3113 
   3114     default:
   3115       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   3116   }
   3117 }
   3118 
   3119 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
   3120   LocationSummary* locations = mul->GetLocations();
   3121   Location first = locations->InAt(0);
   3122   Location second = locations->InAt(1);
   3123   Location out = locations->Out();
   3124   switch (mul->GetResultType()) {
   3125     case Primitive::kPrimInt:
   3126       // The constant may have ended up in a register, so test explicitly to avoid
   3127       // problems where the output may not be the same as the first operand.
   3128       if (mul->InputAt(1)->IsIntConstant()) {
   3129         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
   3130         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
   3131       } else if (second.IsRegister()) {
   3132         DCHECK(first.Equals(out));
   3133         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3134       } else {
   3135         DCHECK(first.Equals(out));
   3136         DCHECK(second.IsStackSlot());
   3137         __ imull(first.AsRegister<CpuRegister>(),
   3138                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3139       }
   3140       break;
   3141     case Primitive::kPrimLong: {
   3142       // The constant may have ended up in a register, so test explicitly to avoid
   3143       // problems where the output may not be the same as the first operand.
   3144       if (mul->InputAt(1)->IsLongConstant()) {
   3145         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
   3146         if (IsInt<32>(value)) {
   3147           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
   3148                    Immediate(static_cast<int32_t>(value)));
   3149         } else {
   3150           // Have to use the constant area.
   3151           DCHECK(first.Equals(out));
   3152           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
   3153         }
   3154       } else if (second.IsRegister()) {
   3155         DCHECK(first.Equals(out));
   3156         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   3157       } else {
   3158         DCHECK(second.IsDoubleStackSlot());
   3159         DCHECK(first.Equals(out));
   3160         __ imulq(first.AsRegister<CpuRegister>(),
   3161                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3162       }
   3163       break;
   3164     }
   3165 
   3166     case Primitive::kPrimFloat: {
   3167       DCHECK(first.Equals(out));
   3168       if (second.IsFpuRegister()) {
   3169         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3170       } else if (second.IsConstant()) {
   3171         __ mulss(first.AsFpuRegister<XmmRegister>(),
   3172                  codegen_->LiteralFloatAddress(
   3173                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3174       } else {
   3175         DCHECK(second.IsStackSlot());
   3176         __ mulss(first.AsFpuRegister<XmmRegister>(),
   3177                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3178       }
   3179       break;
   3180     }
   3181 
   3182     case Primitive::kPrimDouble: {
   3183       DCHECK(first.Equals(out));
   3184       if (second.IsFpuRegister()) {
   3185         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3186       } else if (second.IsConstant()) {
   3187         __ mulsd(first.AsFpuRegister<XmmRegister>(),
   3188                  codegen_->LiteralDoubleAddress(
   3189                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3190       } else {
   3191         DCHECK(second.IsDoubleStackSlot());
   3192         __ mulsd(first.AsFpuRegister<XmmRegister>(),
   3193                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3194       }
   3195       break;
   3196     }
   3197 
   3198     default:
   3199       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   3200   }
   3201 }
   3202 
   3203 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
   3204                                                      uint32_t stack_adjustment, bool is_float) {
   3205   if (source.IsStackSlot()) {
   3206     DCHECK(is_float);
   3207     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
   3208   } else if (source.IsDoubleStackSlot()) {
   3209     DCHECK(!is_float);
   3210     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
   3211   } else {
   3212     // Write the value to the temporary location on the stack and load to FP stack.
   3213     if (is_float) {
   3214       Location stack_temp = Location::StackSlot(temp_offset);
   3215       codegen_->Move(stack_temp, source);
   3216       __ flds(Address(CpuRegister(RSP), temp_offset));
   3217     } else {
   3218       Location stack_temp = Location::DoubleStackSlot(temp_offset);
   3219       codegen_->Move(stack_temp, source);
   3220       __ fldl(Address(CpuRegister(RSP), temp_offset));
   3221     }
   3222   }
   3223 }
   3224 
   3225 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
   3226   Primitive::Type type = rem->GetResultType();
   3227   bool is_float = type == Primitive::kPrimFloat;
   3228   size_t elem_size = Primitive::ComponentSize(type);
   3229   LocationSummary* locations = rem->GetLocations();
   3230   Location first = locations->InAt(0);
   3231   Location second = locations->InAt(1);
   3232   Location out = locations->Out();
   3233 
   3234   // Create stack space for 2 elements.
   3235   // TODO: enhance register allocator to ask for stack temporaries.
   3236   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
   3237 
   3238   // Load the values to the FP stack in reverse order, using temporaries if needed.
   3239   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
   3240   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
   3241 
   3242   // Loop doing FPREM until we stabilize.
   3243   NearLabel retry;
   3244   __ Bind(&retry);
   3245   __ fprem();
   3246 
   3247   // Move FP status to AX.
   3248   __ fstsw();
   3249 
   3250   // And see if the argument reduction is complete. This is signaled by the
   3251   // C2 FPU flag bit set to 0.
   3252   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
   3253   __ j(kNotEqual, &retry);
   3254 
   3255   // We have settled on the final value. Retrieve it into an XMM register.
   3256   // Store FP top of stack to real stack.
   3257   if (is_float) {
   3258     __ fsts(Address(CpuRegister(RSP), 0));
   3259   } else {
   3260     __ fstl(Address(CpuRegister(RSP), 0));
   3261   }
   3262 
   3263   // Pop the 2 items from the FP stack.
   3264   __ fucompp();
   3265 
   3266   // Load the value from the stack into an XMM register.
   3267   DCHECK(out.IsFpuRegister()) << out;
   3268   if (is_float) {
   3269     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
   3270   } else {
   3271     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
   3272   }
   3273 
   3274   // And remove the temporary stack space we allocated.
   3275   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
   3276 }
   3277 
   3278 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
   3279   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3280 
   3281   LocationSummary* locations = instruction->GetLocations();
   3282   Location second = locations->InAt(1);
   3283   DCHECK(second.IsConstant());
   3284 
   3285   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
   3286   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
   3287   int64_t imm = Int64FromConstant(second.GetConstant());
   3288 
   3289   DCHECK(imm == 1 || imm == -1);
   3290 
   3291   switch (instruction->GetResultType()) {
   3292     case Primitive::kPrimInt: {
   3293       if (instruction->IsRem()) {
   3294         __ xorl(output_register, output_register);
   3295       } else {
   3296         __ movl(output_register, input_register);
   3297         if (imm == -1) {
   3298           __ negl(output_register);
   3299         }
   3300       }
   3301       break;
   3302     }
   3303 
   3304     case Primitive::kPrimLong: {
   3305       if (instruction->IsRem()) {
   3306         __ xorl(output_register, output_register);
   3307       } else {
   3308         __ movq(output_register, input_register);
   3309         if (imm == -1) {
   3310           __ negq(output_register);
   3311         }
   3312       }
   3313       break;
   3314     }
   3315 
   3316     default:
   3317       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
   3318   }
   3319 }
   3320 
   3321 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   3322   LocationSummary* locations = instruction->GetLocations();
   3323   Location second = locations->InAt(1);
   3324 
   3325   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
   3326   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
   3327 
   3328   int64_t imm = Int64FromConstant(second.GetConstant());
   3329   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
   3330   uint64_t abs_imm = AbsOrMin(imm);
   3331 
   3332   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
   3333 
   3334   if (instruction->GetResultType() == Primitive::kPrimInt) {
   3335     __ leal(tmp, Address(numerator, abs_imm - 1));
   3336     __ testl(numerator, numerator);
   3337     __ cmov(kGreaterEqual, tmp, numerator);
   3338     int shift = CTZ(imm);
   3339     __ sarl(tmp, Immediate(shift));
   3340 
   3341     if (imm < 0) {
   3342       __ negl(tmp);
   3343     }
   3344 
   3345     __ movl(output_register, tmp);
   3346   } else {
   3347     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
   3348     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
   3349 
   3350     codegen_->Load64BitValue(rdx, abs_imm - 1);
   3351     __ addq(rdx, numerator);
   3352     __ testq(numerator, numerator);
   3353     __ cmov(kGreaterEqual, rdx, numerator);
   3354     int shift = CTZ(imm);
   3355     __ sarq(rdx, Immediate(shift));
   3356 
   3357     if (imm < 0) {
   3358       __ negq(rdx);
   3359     }
   3360 
   3361     __ movq(output_register, rdx);
   3362   }
   3363 }
   3364 
   3365 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
   3366   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3367 
   3368   LocationSummary* locations = instruction->GetLocations();
   3369   Location second = locations->InAt(1);
   3370 
   3371   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
   3372       : locations->GetTemp(0).AsRegister<CpuRegister>();
   3373   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
   3374   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
   3375       : locations->Out().AsRegister<CpuRegister>();
   3376   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   3377 
   3378   DCHECK_EQ(RAX, eax.AsRegister());
   3379   DCHECK_EQ(RDX, edx.AsRegister());
   3380   if (instruction->IsDiv()) {
   3381     DCHECK_EQ(RAX, out.AsRegister());
   3382   } else {
   3383     DCHECK_EQ(RDX, out.AsRegister());
   3384   }
   3385 
   3386   int64_t magic;
   3387   int shift;
   3388 
   3389   // TODO: can these branches be written as one?
   3390   if (instruction->GetResultType() == Primitive::kPrimInt) {
   3391     int imm = second.GetConstant()->AsIntConstant()->GetValue();
   3392 
   3393     CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
   3394 
   3395     __ movl(numerator, eax);
   3396 
   3397     NearLabel no_div;
   3398     NearLabel end;
   3399     __ testl(eax, eax);
   3400     __ j(kNotEqual, &no_div);
   3401 
   3402     __ xorl(out, out);
   3403     __ jmp(&end);
   3404 
   3405     __ Bind(&no_div);
   3406 
   3407     __ movl(eax, Immediate(magic));
   3408     __ imull(numerator);
   3409 
   3410     if (imm > 0 && magic < 0) {
   3411       __ addl(edx, numerator);
   3412     } else if (imm < 0 && magic > 0) {
   3413       __ subl(edx, numerator);
   3414     }
   3415 
   3416     if (shift != 0) {
   3417       __ sarl(edx, Immediate(shift));
   3418     }
   3419 
   3420     __ movl(eax, edx);
   3421     __ shrl(edx, Immediate(31));
   3422     __ addl(edx, eax);
   3423 
   3424     if (instruction->IsRem()) {
   3425       __ movl(eax, numerator);
   3426       __ imull(edx, Immediate(imm));
   3427       __ subl(eax, edx);
   3428       __ movl(edx, eax);
   3429     } else {
   3430       __ movl(eax, edx);
   3431     }
   3432     __ Bind(&end);
   3433   } else {
   3434     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
   3435 
   3436     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
   3437 
   3438     CpuRegister rax = eax;
   3439     CpuRegister rdx = edx;
   3440 
   3441     CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
   3442 
   3443     // Save the numerator.
   3444     __ movq(numerator, rax);
   3445 
   3446     // RAX = magic
   3447     codegen_->Load64BitValue(rax, magic);
   3448 
   3449     // RDX:RAX = magic * numerator
   3450     __ imulq(numerator);
   3451 
   3452     if (imm > 0 && magic < 0) {
   3453       // RDX += numerator
   3454       __ addq(rdx, numerator);
   3455     } else if (imm < 0 && magic > 0) {
   3456       // RDX -= numerator
   3457       __ subq(rdx, numerator);
   3458     }
   3459 
   3460     // Shift if needed.
   3461     if (shift != 0) {
   3462       __ sarq(rdx, Immediate(shift));
   3463     }
   3464 
   3465     // RDX += 1 if RDX < 0
   3466     __ movq(rax, rdx);
   3467     __ shrq(rdx, Immediate(63));
   3468     __ addq(rdx, rax);
   3469 
   3470     if (instruction->IsRem()) {
   3471       __ movq(rax, numerator);
   3472 
   3473       if (IsInt<32>(imm)) {
   3474         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
   3475       } else {
   3476         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
   3477       }
   3478 
   3479       __ subq(rax, rdx);
   3480       __ movq(rdx, rax);
   3481     } else {
   3482       __ movq(rax, rdx);
   3483     }
   3484   }
   3485 }
   3486 
   3487 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   3488   DCHECK(instruction->IsDiv() || instruction->IsRem());
   3489   Primitive::Type type = instruction->GetResultType();
   3490   DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
   3491 
   3492   bool is_div = instruction->IsDiv();
   3493   LocationSummary* locations = instruction->GetLocations();
   3494 
   3495   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   3496   Location second = locations->InAt(1);
   3497 
   3498   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
   3499   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
   3500 
   3501   if (second.IsConstant()) {
   3502     int64_t imm = Int64FromConstant(second.GetConstant());
   3503 
   3504     if (imm == 0) {
   3505       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   3506     } else if (imm == 1 || imm == -1) {
   3507       DivRemOneOrMinusOne(instruction);
   3508     } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
   3509       DivByPowerOfTwo(instruction->AsDiv());
   3510     } else {
   3511       DCHECK(imm <= -2 || imm >= 2);
   3512       GenerateDivRemWithAnyConstant(instruction);
   3513     }
   3514   } else {
   3515     SlowPathCode* slow_path =
   3516         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
   3517             instruction, out.AsRegister(), type, is_div);
   3518     codegen_->AddSlowPath(slow_path);
   3519 
   3520     CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3521     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
   3522     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
   3523     // so it's safe to just use negl instead of more complex comparisons.
   3524     if (type == Primitive::kPrimInt) {
   3525       __ cmpl(second_reg, Immediate(-1));
   3526       __ j(kEqual, slow_path->GetEntryLabel());
   3527       // edx:eax <- sign-extended of eax
   3528       __ cdq();
   3529       // eax = quotient, edx = remainder
   3530       __ idivl(second_reg);
   3531     } else {
   3532       __ cmpq(second_reg, Immediate(-1));
   3533       __ j(kEqual, slow_path->GetEntryLabel());
   3534       // rdx:rax <- sign-extended of rax
   3535       __ cqo();
   3536       // rax = quotient, rdx = remainder
   3537       __ idivq(second_reg);
   3538     }
   3539     __ Bind(slow_path->GetExitLabel());
   3540   }
   3541 }
   3542 
   3543 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
   3544   LocationSummary* locations =
   3545       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
   3546   switch (div->GetResultType()) {
   3547     case Primitive::kPrimInt:
   3548     case Primitive::kPrimLong: {
   3549       locations->SetInAt(0, Location::RegisterLocation(RAX));
   3550       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
   3551       locations->SetOut(Location::SameAsFirstInput());
   3552       // Intel uses edx:eax as the dividend.
   3553       locations->AddTemp(Location::RegisterLocation(RDX));
   3554       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
   3555       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
   3556       // output and request another temp.
   3557       if (div->InputAt(1)->IsConstant()) {
   3558         locations->AddTemp(Location::RequiresRegister());
   3559       }
   3560       break;
   3561     }
   3562 
   3563     case Primitive::kPrimFloat:
   3564     case Primitive::kPrimDouble: {
   3565       locations->SetInAt(0, Location::RequiresFpuRegister());
   3566       locations->SetInAt(1, Location::Any());
   3567       locations->SetOut(Location::SameAsFirstInput());
   3568       break;
   3569     }
   3570 
   3571     default:
   3572       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3573   }
   3574 }
   3575 
   3576 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
   3577   LocationSummary* locations = div->GetLocations();
   3578   Location first = locations->InAt(0);
   3579   Location second = locations->InAt(1);
   3580   DCHECK(first.Equals(locations->Out()));
   3581 
   3582   Primitive::Type type = div->GetResultType();
   3583   switch (type) {
   3584     case Primitive::kPrimInt:
   3585     case Primitive::kPrimLong: {
   3586       GenerateDivRemIntegral(div);
   3587       break;
   3588     }
   3589 
   3590     case Primitive::kPrimFloat: {
   3591       if (second.IsFpuRegister()) {
   3592         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3593       } else if (second.IsConstant()) {
   3594         __ divss(first.AsFpuRegister<XmmRegister>(),
   3595                  codegen_->LiteralFloatAddress(
   3596                      second.GetConstant()->AsFloatConstant()->GetValue()));
   3597       } else {
   3598         DCHECK(second.IsStackSlot());
   3599         __ divss(first.AsFpuRegister<XmmRegister>(),
   3600                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3601       }
   3602       break;
   3603     }
   3604 
   3605     case Primitive::kPrimDouble: {
   3606       if (second.IsFpuRegister()) {
   3607         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
   3608       } else if (second.IsConstant()) {
   3609         __ divsd(first.AsFpuRegister<XmmRegister>(),
   3610                  codegen_->LiteralDoubleAddress(
   3611                      second.GetConstant()->AsDoubleConstant()->GetValue()));
   3612       } else {
   3613         DCHECK(second.IsDoubleStackSlot());
   3614         __ divsd(first.AsFpuRegister<XmmRegister>(),
   3615                  Address(CpuRegister(RSP), second.GetStackIndex()));
   3616       }
   3617       break;
   3618     }
   3619 
   3620     default:
   3621       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   3622   }
   3623 }
   3624 
   3625 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
   3626   Primitive::Type type = rem->GetResultType();
   3627   LocationSummary* locations =
   3628     new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
   3629 
   3630   switch (type) {
   3631     case Primitive::kPrimInt:
   3632     case Primitive::kPrimLong: {
   3633       locations->SetInAt(0, Location::RegisterLocation(RAX));
   3634       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
   3635       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
   3636       locations->SetOut(Location::RegisterLocation(RDX));
   3637       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
   3638       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
   3639       // output and request another temp.
   3640       if (rem->InputAt(1)->IsConstant()) {
   3641         locations->AddTemp(Location::RequiresRegister());
   3642       }
   3643       break;
   3644     }
   3645 
   3646     case Primitive::kPrimFloat:
   3647     case Primitive::kPrimDouble: {
   3648       locations->SetInAt(0, Location::Any());
   3649       locations->SetInAt(1, Location::Any());
   3650       locations->SetOut(Location::RequiresFpuRegister());
   3651       locations->AddTemp(Location::RegisterLocation(RAX));
   3652       break;
   3653     }
   3654 
   3655     default:
   3656       LOG(FATAL) << "Unexpected rem type " << type;
   3657   }
   3658 }
   3659 
   3660 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
   3661   Primitive::Type type = rem->GetResultType();
   3662   switch (type) {
   3663     case Primitive::kPrimInt:
   3664     case Primitive::kPrimLong: {
   3665       GenerateDivRemIntegral(rem);
   3666       break;
   3667     }
   3668     case Primitive::kPrimFloat:
   3669     case Primitive::kPrimDouble: {
   3670       GenerateRemFP(rem);
   3671       break;
   3672     }
   3673     default:
   3674       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
   3675   }
   3676 }
   3677 
   3678 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3679   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
   3680       ? LocationSummary::kCallOnSlowPath
   3681       : LocationSummary::kNoCall;
   3682   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   3683   locations->SetInAt(0, Location::Any());
   3684   if (instruction->HasUses()) {
   3685     locations->SetOut(Location::SameAsFirstInput());
   3686   }
   3687 }
   3688 
   3689 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   3690   SlowPathCode* slow_path =
   3691       new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
   3692   codegen_->AddSlowPath(slow_path);
   3693 
   3694   LocationSummary* locations = instruction->GetLocations();
   3695   Location value = locations->InAt(0);
   3696 
   3697   switch (instruction->GetType()) {
   3698     case Primitive::kPrimBoolean:
   3699     case Primitive::kPrimByte:
   3700     case Primitive::kPrimChar:
   3701     case Primitive::kPrimShort:
   3702     case Primitive::kPrimInt: {
   3703       if (value.IsRegister()) {
   3704         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
   3705         __ j(kEqual, slow_path->GetEntryLabel());
   3706       } else if (value.IsStackSlot()) {
   3707         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
   3708         __ j(kEqual, slow_path->GetEntryLabel());
   3709       } else {
   3710         DCHECK(value.IsConstant()) << value;
   3711         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
   3712         __ jmp(slow_path->GetEntryLabel());
   3713         }
   3714       }
   3715       break;
   3716     }
   3717     case Primitive::kPrimLong: {
   3718       if (value.IsRegister()) {
   3719         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
   3720         __ j(kEqual, slow_path->GetEntryLabel());
   3721       } else if (value.IsDoubleStackSlot()) {
   3722         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
   3723         __ j(kEqual, slow_path->GetEntryLabel());
   3724       } else {
   3725         DCHECK(value.IsConstant()) << value;
   3726         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
   3727         __ jmp(slow_path->GetEntryLabel());
   3728         }
   3729       }
   3730       break;
   3731     }
   3732     default:
   3733       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
   3734   }
   3735 }
   3736 
   3737 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
   3738   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
   3739 
   3740   LocationSummary* locations =
   3741       new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
   3742 
   3743   switch (op->GetResultType()) {
   3744     case Primitive::kPrimInt:
   3745     case Primitive::kPrimLong: {
   3746       locations->SetInAt(0, Location::RequiresRegister());
   3747       // The shift count needs to be in CL.
   3748       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
   3749       locations->SetOut(Location::SameAsFirstInput());
   3750       break;
   3751     }
   3752     default:
   3753       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
   3754   }
   3755 }
   3756 
   3757 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
   3758   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
   3759 
   3760   LocationSummary* locations = op->GetLocations();
   3761   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
   3762   Location second = locations->InAt(1);
   3763 
   3764   switch (op->GetResultType()) {
   3765     case Primitive::kPrimInt: {
   3766       if (second.IsRegister()) {
   3767         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3768         if (op->IsShl()) {
   3769           __ shll(first_reg, second_reg);
   3770         } else if (op->IsShr()) {
   3771           __ sarl(first_reg, second_reg);
   3772         } else {
   3773           __ shrl(first_reg, second_reg);
   3774         }
   3775       } else {
   3776         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
   3777         if (op->IsShl()) {
   3778           __ shll(first_reg, imm);
   3779         } else if (op->IsShr()) {
   3780           __ sarl(first_reg, imm);
   3781         } else {
   3782           __ shrl(first_reg, imm);
   3783         }
   3784       }
   3785       break;
   3786     }
   3787     case Primitive::kPrimLong: {
   3788       if (second.IsRegister()) {
   3789         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3790         if (op->IsShl()) {
   3791           __ shlq(first_reg, second_reg);
   3792         } else if (op->IsShr()) {
   3793           __ sarq(first_reg, second_reg);
   3794         } else {
   3795           __ shrq(first_reg, second_reg);
   3796         }
   3797       } else {
   3798         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
   3799         if (op->IsShl()) {
   3800           __ shlq(first_reg, imm);
   3801         } else if (op->IsShr()) {
   3802           __ sarq(first_reg, imm);
   3803         } else {
   3804           __ shrq(first_reg, imm);
   3805         }
   3806       }
   3807       break;
   3808     }
   3809     default:
   3810       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
   3811       UNREACHABLE();
   3812   }
   3813 }
   3814 
   3815 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
   3816   LocationSummary* locations =
   3817       new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
   3818 
   3819   switch (ror->GetResultType()) {
   3820     case Primitive::kPrimInt:
   3821     case Primitive::kPrimLong: {
   3822       locations->SetInAt(0, Location::RequiresRegister());
   3823       // The shift count needs to be in CL (unless it is a constant).
   3824       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
   3825       locations->SetOut(Location::SameAsFirstInput());
   3826       break;
   3827     }
   3828     default:
   3829       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
   3830       UNREACHABLE();
   3831   }
   3832 }
   3833 
   3834 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
   3835   LocationSummary* locations = ror->GetLocations();
   3836   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
   3837   Location second = locations->InAt(1);
   3838 
   3839   switch (ror->GetResultType()) {
   3840     case Primitive::kPrimInt:
   3841       if (second.IsRegister()) {
   3842         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3843         __ rorl(first_reg, second_reg);
   3844       } else {
   3845         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
   3846         __ rorl(first_reg, imm);
   3847       }
   3848       break;
   3849     case Primitive::kPrimLong:
   3850       if (second.IsRegister()) {
   3851         CpuRegister second_reg = second.AsRegister<CpuRegister>();
   3852         __ rorq(first_reg, second_reg);
   3853       } else {
   3854         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
   3855         __ rorq(first_reg, imm);
   3856       }
   3857       break;
   3858     default:
   3859       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
   3860       UNREACHABLE();
   3861   }
   3862 }
   3863 
   3864 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
   3865   HandleShift(shl);
   3866 }
   3867 
   3868 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
   3869   HandleShift(shl);
   3870 }
   3871 
   3872 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
   3873   HandleShift(shr);
   3874 }
   3875 
   3876 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
   3877   HandleShift(shr);
   3878 }
   3879 
   3880 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
   3881   HandleShift(ushr);
   3882 }
   3883 
   3884 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
   3885   HandleShift(ushr);
   3886 }
   3887 
   3888 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   3889   LocationSummary* locations =
   3890       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   3891   InvokeRuntimeCallingConvention calling_convention;
   3892   if (instruction->IsStringAlloc()) {
   3893     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
   3894   } else {
   3895     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   3896     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   3897   }
   3898   locations->SetOut(Location::RegisterLocation(RAX));
   3899 }
   3900 
   3901 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
   3902   // Note: if heap poisoning is enabled, the entry point takes cares
   3903   // of poisoning the reference.
   3904   if (instruction->IsStringAlloc()) {
   3905     // String is allocated through StringFactory. Call NewEmptyString entry point.
   3906     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   3907     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
   3908     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
   3909     __ call(Address(temp, code_offset.SizeValue()));
   3910     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   3911   } else {
   3912     codegen_->InvokeRuntime(instruction->GetEntrypoint(),
   3913                             instruction,
   3914                             instruction->GetDexPc(),
   3915                             nullptr);
   3916     CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
   3917     DCHECK(!codegen_->IsLeafMethod());
   3918   }
   3919 }
   3920 
   3921 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
   3922   LocationSummary* locations =
   3923       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   3924   InvokeRuntimeCallingConvention calling_convention;
   3925   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   3926   locations->SetOut(Location::RegisterLocation(RAX));
   3927   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   3928   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   3929 }
   3930 
   3931 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
   3932   InvokeRuntimeCallingConvention calling_convention;
   3933   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
   3934                            instruction->GetTypeIndex());
   3935   // Note: if heap poisoning is enabled, the entry point takes cares
   3936   // of poisoning the reference.
   3937   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
   3938                           instruction,
   3939                           instruction->GetDexPc(),
   3940                           nullptr);
   3941   CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
   3942 
   3943   DCHECK(!codegen_->IsLeafMethod());
   3944 }
   3945 
   3946 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
   3947   LocationSummary* locations =
   3948       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   3949   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
   3950   if (location.IsStackSlot()) {
   3951     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   3952   } else if (location.IsDoubleStackSlot()) {
   3953     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   3954   }
   3955   locations->SetOut(location);
   3956 }
   3957 
   3958 void InstructionCodeGeneratorX86_64::VisitParameterValue(
   3959     HParameterValue* instruction ATTRIBUTE_UNUSED) {
   3960   // Nothing to do, the parameter is already at its location.
   3961 }
   3962 
   3963 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
   3964   LocationSummary* locations =
   3965       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   3966   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
   3967 }
   3968 
   3969 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
   3970     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
   3971   // Nothing to do, the method is already at its location.
   3972 }
   3973 
   3974 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   3975   LocationSummary* locations =
   3976       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   3977   locations->SetInAt(0, Location::RequiresRegister());
   3978   locations->SetOut(Location::RequiresRegister());
   3979 }
   3980 
   3981 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   3982   LocationSummary* locations = instruction->GetLocations();
   3983   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
   3984     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
   3985         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
   3986     __ movq(locations->Out().AsRegister<CpuRegister>(),
   3987             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
   3988   } else {
   3989     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
   3990         instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
   3991     __ movq(locations->Out().AsRegister<CpuRegister>(),
   3992             Address(locations->InAt(0).AsRegister<CpuRegister>(),
   3993             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   3994     __ movq(locations->Out().AsRegister<CpuRegister>(),
   3995             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
   3996   }
   3997 }
   3998 
   3999 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
   4000   LocationSummary* locations =
   4001       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
   4002   locations->SetInAt(0, Location::RequiresRegister());
   4003   locations->SetOut(Location::SameAsFirstInput());
   4004 }
   4005 
   4006 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
   4007   LocationSummary* locations = not_->GetLocations();
   4008   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
   4009             locations->Out().AsRegister<CpuRegister>().AsRegister());
   4010   Location out = locations->Out();
   4011   switch (not_->GetResultType()) {
   4012     case Primitive::kPrimInt:
   4013       __ notl(out.AsRegister<CpuRegister>());
   4014       break;
   4015 
   4016     case Primitive::kPrimLong:
   4017       __ notq(out.AsRegister<CpuRegister>());
   4018       break;
   4019 
   4020     default:
   4021       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
   4022   }
   4023 }
   4024 
   4025 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
   4026   LocationSummary* locations =
   4027       new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
   4028   locations->SetInAt(0, Location::RequiresRegister());
   4029   locations->SetOut(Location::SameAsFirstInput());
   4030 }
   4031 
   4032 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
   4033   LocationSummary* locations = bool_not->GetLocations();
   4034   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
   4035             locations->Out().AsRegister<CpuRegister>().AsRegister());
   4036   Location out = locations->Out();
   4037   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
   4038 }
   4039 
   4040 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
   4041   LocationSummary* locations =
   4042       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4043   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
   4044     locations->SetInAt(i, Location::Any());
   4045   }
   4046   locations->SetOut(Location::Any());
   4047 }
   4048 
   4049 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   4050   LOG(FATAL) << "Unimplemented";
   4051 }
   4052 
   4053 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
   4054   /*
   4055    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
   4056    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
   4057    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
   4058    */
   4059   switch (kind) {
   4060     case MemBarrierKind::kAnyAny: {
   4061       MemoryFence();
   4062       break;
   4063     }
   4064     case MemBarrierKind::kAnyStore:
   4065     case MemBarrierKind::kLoadAny:
   4066     case MemBarrierKind::kStoreStore: {
   4067       // nop
   4068       break;
   4069     }
   4070     default:
   4071       LOG(FATAL) << "Unexpected memory barier " << kind;
   4072   }
   4073 }
   4074 
   4075 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
   4076   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   4077 
   4078   bool object_field_get_with_read_barrier =
   4079       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   4080   LocationSummary* locations =
   4081       new (GetGraph()->GetArena()) LocationSummary(instruction,
   4082                                                    object_field_get_with_read_barrier ?
   4083                                                        LocationSummary::kCallOnSlowPath :
   4084                                                        LocationSummary::kNoCall);
   4085   locations->SetInAt(0, Location::RequiresRegister());
   4086   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   4087     locations->SetOut(Location::RequiresFpuRegister());
   4088   } else {
   4089     // The output overlaps for an object field get when read barriers
   4090     // are enabled: we do not want the move to overwrite the object's
   4091     // location, as we need it to emit the read barrier.
   4092     locations->SetOut(
   4093         Location::RequiresRegister(),
   4094         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   4095   }
   4096   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
   4097     // We need a temporary register for the read barrier marking slow
   4098     // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
   4099     locations->AddTemp(Location::RequiresRegister());
   4100   }
   4101 }
   4102 
   4103 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
   4104                                                     const FieldInfo& field_info) {
   4105   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
   4106 
   4107   LocationSummary* locations = instruction->GetLocations();
   4108   Location base_loc = locations->InAt(0);
   4109   CpuRegister base = base_loc.AsRegister<CpuRegister>();
   4110   Location out = locations->Out();
   4111   bool is_volatile = field_info.IsVolatile();
   4112   Primitive::Type field_type = field_info.GetFieldType();
   4113   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   4114 
   4115   switch (field_type) {
   4116     case Primitive::kPrimBoolean: {
   4117       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
   4118       break;
   4119     }
   4120 
   4121     case Primitive::kPrimByte: {
   4122       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
   4123       break;
   4124     }
   4125 
   4126     case Primitive::kPrimShort: {
   4127       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
   4128       break;
   4129     }
   4130 
   4131     case Primitive::kPrimChar: {
   4132       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
   4133       break;
   4134     }
   4135 
   4136     case Primitive::kPrimInt: {
   4137       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
   4138       break;
   4139     }
   4140 
   4141     case Primitive::kPrimNot: {
   4142       // /* HeapReference<Object> */ out = *(base + offset)
   4143       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   4144         Location temp_loc = locations->GetTemp(0);
   4145         // Note that a potential implicit null check is handled in this
   4146         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
   4147         codegen_->GenerateFieldLoadWithBakerReadBarrier(
   4148             instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
   4149         if (is_volatile) {
   4150           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4151         }
   4152       } else {
   4153         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
   4154         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4155         if (is_volatile) {
   4156           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4157         }
   4158         // If read barriers are enabled, emit read barriers other than
   4159         // Baker's using a slow path (and also unpoison the loaded
   4160         // reference, if heap poisoning is enabled).
   4161         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
   4162       }
   4163       break;
   4164     }
   4165 
   4166     case Primitive::kPrimLong: {
   4167       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
   4168       break;
   4169     }
   4170 
   4171     case Primitive::kPrimFloat: {
   4172       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
   4173       break;
   4174     }
   4175 
   4176     case Primitive::kPrimDouble: {
   4177       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
   4178       break;
   4179     }
   4180 
   4181     case Primitive::kPrimVoid:
   4182       LOG(FATAL) << "Unreachable type " << field_type;
   4183       UNREACHABLE();
   4184   }
   4185 
   4186   if (field_type == Primitive::kPrimNot) {
   4187     // Potential implicit null checks, in the case of reference
   4188     // fields, are handled in the previous switch statement.
   4189   } else {
   4190     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4191   }
   4192 
   4193   if (is_volatile) {
   4194     if (field_type == Primitive::kPrimNot) {
   4195       // Memory barriers, in the case of references, are also handled
   4196       // in the previous switch statement.
   4197     } else {
   4198       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   4199     }
   4200   }
   4201 }
   4202 
   4203 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
   4204                                             const FieldInfo& field_info) {
   4205   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   4206 
   4207   LocationSummary* locations =
   4208       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4209   Primitive::Type field_type = field_info.GetFieldType();
   4210   bool is_volatile = field_info.IsVolatile();
   4211   bool needs_write_barrier =
   4212       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
   4213 
   4214   locations->SetInAt(0, Location::RequiresRegister());
   4215   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
   4216     if (is_volatile) {
   4217       // In order to satisfy the semantics of volatile, this must be a single instruction store.
   4218       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
   4219     } else {
   4220       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
   4221     }
   4222   } else {
   4223     if (is_volatile) {
   4224       // In order to satisfy the semantics of volatile, this must be a single instruction store.
   4225       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
   4226     } else {
   4227       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4228     }
   4229   }
   4230   if (needs_write_barrier) {
   4231     // Temporary registers for the write barrier.
   4232     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   4233     locations->AddTemp(Location::RequiresRegister());
   4234   } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
   4235     // Temporary register for the reference poisoning.
   4236     locations->AddTemp(Location::RequiresRegister());
   4237   }
   4238 }
   4239 
   4240 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
   4241                                                     const FieldInfo& field_info,
   4242                                                     bool value_can_be_null) {
   4243   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
   4244 
   4245   LocationSummary* locations = instruction->GetLocations();
   4246   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
   4247   Location value = locations->InAt(1);
   4248   bool is_volatile = field_info.IsVolatile();
   4249   Primitive::Type field_type = field_info.GetFieldType();
   4250   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   4251 
   4252   if (is_volatile) {
   4253     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   4254   }
   4255 
   4256   bool maybe_record_implicit_null_check_done = false;
   4257 
   4258   switch (field_type) {
   4259     case Primitive::kPrimBoolean:
   4260     case Primitive::kPrimByte: {
   4261       if (value.IsConstant()) {
   4262         int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4263         __ movb(Address(base, offset), Immediate(v));
   4264       } else {
   4265         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
   4266       }
   4267       break;
   4268     }
   4269 
   4270     case Primitive::kPrimShort:
   4271     case Primitive::kPrimChar: {
   4272       if (value.IsConstant()) {
   4273         int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4274         __ movw(Address(base, offset), Immediate(v));
   4275       } else {
   4276         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
   4277       }
   4278       break;
   4279     }
   4280 
   4281     case Primitive::kPrimInt:
   4282     case Primitive::kPrimNot: {
   4283       if (value.IsConstant()) {
   4284         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4285         // `field_type == Primitive::kPrimNot` implies `v == 0`.
   4286         DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
   4287         // Note: if heap poisoning is enabled, no need to poison
   4288         // (negate) `v` if it is a reference, as it would be null.
   4289         __ movl(Address(base, offset), Immediate(v));
   4290       } else {
   4291         if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
   4292           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   4293           __ movl(temp, value.AsRegister<CpuRegister>());
   4294           __ PoisonHeapReference(temp);
   4295           __ movl(Address(base, offset), temp);
   4296         } else {
   4297           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
   4298         }
   4299       }
   4300       break;
   4301     }
   4302 
   4303     case Primitive::kPrimLong: {
   4304       if (value.IsConstant()) {
   4305         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
   4306         codegen_->MoveInt64ToAddress(Address(base, offset),
   4307                                      Address(base, offset + sizeof(int32_t)),
   4308                                      v,
   4309                                      instruction);
   4310         maybe_record_implicit_null_check_done = true;
   4311       } else {
   4312         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
   4313       }
   4314       break;
   4315     }
   4316 
   4317     case Primitive::kPrimFloat: {
   4318       if (value.IsConstant()) {
   4319         int32_t v =
   4320             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
   4321         __ movl(Address(base, offset), Immediate(v));
   4322       } else {
   4323         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
   4324       }
   4325       break;
   4326     }
   4327 
   4328     case Primitive::kPrimDouble: {
   4329       if (value.IsConstant()) {
   4330         int64_t v =
   4331             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
   4332         codegen_->MoveInt64ToAddress(Address(base, offset),
   4333                                      Address(base, offset + sizeof(int32_t)),
   4334                                      v,
   4335                                      instruction);
   4336         maybe_record_implicit_null_check_done = true;
   4337       } else {
   4338         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
   4339       }
   4340       break;
   4341     }
   4342 
   4343     case Primitive::kPrimVoid:
   4344       LOG(FATAL) << "Unreachable type " << field_type;
   4345       UNREACHABLE();
   4346   }
   4347 
   4348   if (!maybe_record_implicit_null_check_done) {
   4349     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4350   }
   4351 
   4352   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
   4353     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   4354     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
   4355     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
   4356   }
   4357 
   4358   if (is_volatile) {
   4359     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   4360   }
   4361 }
   4362 
   4363 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   4364   HandleFieldSet(instruction, instruction->GetFieldInfo());
   4365 }
   4366 
   4367 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   4368   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   4369 }
   4370 
   4371 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   4372   HandleFieldGet(instruction);
   4373 }
   4374 
   4375 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   4376   HandleFieldGet(instruction, instruction->GetFieldInfo());
   4377 }
   4378 
   4379 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   4380   HandleFieldGet(instruction);
   4381 }
   4382 
   4383 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   4384   HandleFieldGet(instruction, instruction->GetFieldInfo());
   4385 }
   4386 
   4387 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   4388   HandleFieldSet(instruction, instruction->GetFieldInfo());
   4389 }
   4390 
   4391 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   4392   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
   4393 }
   4394 
   4395 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
   4396     HUnresolvedInstanceFieldGet* instruction) {
   4397   FieldAccessCallingConventionX86_64 calling_convention;
   4398   codegen_->CreateUnresolvedFieldLocationSummary(
   4399       instruction, instruction->GetFieldType(), calling_convention);
   4400 }
   4401 
   4402 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
   4403     HUnresolvedInstanceFieldGet* instruction) {
   4404   FieldAccessCallingConventionX86_64 calling_convention;
   4405   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4406                                           instruction->GetFieldType(),
   4407                                           instruction->GetFieldIndex(),
   4408                                           instruction->GetDexPc(),
   4409                                           calling_convention);
   4410 }
   4411 
   4412 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
   4413     HUnresolvedInstanceFieldSet* instruction) {
   4414   FieldAccessCallingConventionX86_64 calling_convention;
   4415   codegen_->CreateUnresolvedFieldLocationSummary(
   4416       instruction, instruction->GetFieldType(), calling_convention);
   4417 }
   4418 
   4419 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
   4420     HUnresolvedInstanceFieldSet* instruction) {
   4421   FieldAccessCallingConventionX86_64 calling_convention;
   4422   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4423                                           instruction->GetFieldType(),
   4424                                           instruction->GetFieldIndex(),
   4425                                           instruction->GetDexPc(),
   4426                                           calling_convention);
   4427 }
   4428 
   4429 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
   4430     HUnresolvedStaticFieldGet* instruction) {
   4431   FieldAccessCallingConventionX86_64 calling_convention;
   4432   codegen_->CreateUnresolvedFieldLocationSummary(
   4433       instruction, instruction->GetFieldType(), calling_convention);
   4434 }
   4435 
   4436 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
   4437     HUnresolvedStaticFieldGet* instruction) {
   4438   FieldAccessCallingConventionX86_64 calling_convention;
   4439   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4440                                           instruction->GetFieldType(),
   4441                                           instruction->GetFieldIndex(),
   4442                                           instruction->GetDexPc(),
   4443                                           calling_convention);
   4444 }
   4445 
   4446 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
   4447     HUnresolvedStaticFieldSet* instruction) {
   4448   FieldAccessCallingConventionX86_64 calling_convention;
   4449   codegen_->CreateUnresolvedFieldLocationSummary(
   4450       instruction, instruction->GetFieldType(), calling_convention);
   4451 }
   4452 
   4453 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
   4454     HUnresolvedStaticFieldSet* instruction) {
   4455   FieldAccessCallingConventionX86_64 calling_convention;
   4456   codegen_->GenerateUnresolvedFieldAccess(instruction,
   4457                                           instruction->GetFieldType(),
   4458                                           instruction->GetFieldIndex(),
   4459                                           instruction->GetDexPc(),
   4460                                           calling_convention);
   4461 }
   4462 
   4463 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
   4464   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
   4465       ? LocationSummary::kCallOnSlowPath
   4466       : LocationSummary::kNoCall;
   4467   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   4468   Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
   4469       ? Location::RequiresRegister()
   4470       : Location::Any();
   4471   locations->SetInAt(0, loc);
   4472   if (instruction->HasUses()) {
   4473     locations->SetOut(Location::SameAsFirstInput());
   4474   }
   4475 }
   4476 
   4477 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
   4478   if (CanMoveNullCheckToUser(instruction)) {
   4479     return;
   4480   }
   4481   LocationSummary* locations = instruction->GetLocations();
   4482   Location obj = locations->InAt(0);
   4483 
   4484   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
   4485   RecordPcInfo(instruction, instruction->GetDexPc());
   4486 }
   4487 
   4488 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
   4489   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
   4490   AddSlowPath(slow_path);
   4491 
   4492   LocationSummary* locations = instruction->GetLocations();
   4493   Location obj = locations->InAt(0);
   4494 
   4495   if (obj.IsRegister()) {
   4496     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
   4497   } else if (obj.IsStackSlot()) {
   4498     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
   4499   } else {
   4500     DCHECK(obj.IsConstant()) << obj;
   4501     DCHECK(obj.GetConstant()->IsNullConstant());
   4502     __ jmp(slow_path->GetEntryLabel());
   4503     return;
   4504   }
   4505   __ j(kEqual, slow_path->GetEntryLabel());
   4506 }
   4507 
   4508 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
   4509   codegen_->GenerateNullCheck(instruction);
   4510 }
   4511 
   4512 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
   4513   bool object_array_get_with_read_barrier =
   4514       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   4515   LocationSummary* locations =
   4516       new (GetGraph()->GetArena()) LocationSummary(instruction,
   4517                                                    object_array_get_with_read_barrier ?
   4518                                                        LocationSummary::kCallOnSlowPath :
   4519                                                        LocationSummary::kNoCall);
   4520   locations->SetInAt(0, Location::RequiresRegister());
   4521   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4522   if (Primitive::IsFloatingPointType(instruction->GetType())) {
   4523     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   4524   } else {
   4525     // The output overlaps for an object array get when read barriers
   4526     // are enabled: we do not want the move to overwrite the array's
   4527     // location, as we need it to emit the read barrier.
   4528     locations->SetOut(
   4529         Location::RequiresRegister(),
   4530         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   4531   }
   4532   // We need a temporary register for the read barrier marking slow
   4533   // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
   4534   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
   4535     locations->AddTemp(Location::RequiresRegister());
   4536   }
   4537 }
   4538 
   4539 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
   4540   LocationSummary* locations = instruction->GetLocations();
   4541   Location obj_loc = locations->InAt(0);
   4542   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   4543   Location index = locations->InAt(1);
   4544   Location out_loc = locations->Out();
   4545 
   4546   Primitive::Type type = instruction->GetType();
   4547   switch (type) {
   4548     case Primitive::kPrimBoolean: {
   4549       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
   4550       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4551       if (index.IsConstant()) {
   4552         __ movzxb(out, Address(obj,
   4553             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
   4554       } else {
   4555         __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
   4556       }
   4557       break;
   4558     }
   4559 
   4560     case Primitive::kPrimByte: {
   4561       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
   4562       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4563       if (index.IsConstant()) {
   4564         __ movsxb(out, Address(obj,
   4565             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
   4566       } else {
   4567         __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
   4568       }
   4569       break;
   4570     }
   4571 
   4572     case Primitive::kPrimShort: {
   4573       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
   4574       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4575       if (index.IsConstant()) {
   4576         __ movsxw(out, Address(obj,
   4577             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
   4578       } else {
   4579         __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
   4580       }
   4581       break;
   4582     }
   4583 
   4584     case Primitive::kPrimChar: {
   4585       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
   4586       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4587       if (index.IsConstant()) {
   4588         __ movzxw(out, Address(obj,
   4589             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
   4590       } else {
   4591         __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
   4592       }
   4593       break;
   4594     }
   4595 
   4596     case Primitive::kPrimInt: {
   4597       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4598       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4599       if (index.IsConstant()) {
   4600         __ movl(out, Address(obj,
   4601             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
   4602       } else {
   4603         __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
   4604       }
   4605       break;
   4606     }
   4607 
   4608     case Primitive::kPrimNot: {
   4609       static_assert(
   4610           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
   4611           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   4612       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4613       // /* HeapReference<Object> */ out =
   4614       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   4615       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
   4616         Location temp = locations->GetTemp(0);
   4617         // Note that a potential implicit null check is handled in this
   4618         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
   4619         codegen_->GenerateArrayLoadWithBakerReadBarrier(
   4620             instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
   4621       } else {
   4622         CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4623         if (index.IsConstant()) {
   4624           uint32_t offset =
   4625               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
   4626           __ movl(out, Address(obj, offset));
   4627           codegen_->MaybeRecordImplicitNullCheck(instruction);
   4628           // If read barriers are enabled, emit read barriers other than
   4629           // Baker's using a slow path (and also unpoison the loaded
   4630           // reference, if heap poisoning is enabled).
   4631           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
   4632         } else {
   4633           __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
   4634           codegen_->MaybeRecordImplicitNullCheck(instruction);
   4635           // If read barriers are enabled, emit read barriers other than
   4636           // Baker's using a slow path (and also unpoison the loaded
   4637           // reference, if heap poisoning is enabled).
   4638           codegen_->MaybeGenerateReadBarrierSlow(
   4639               instruction, out_loc, out_loc, obj_loc, data_offset, index);
   4640         }
   4641       }
   4642       break;
   4643     }
   4644 
   4645     case Primitive::kPrimLong: {
   4646       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
   4647       CpuRegister out = out_loc.AsRegister<CpuRegister>();
   4648       if (index.IsConstant()) {
   4649         __ movq(out, Address(obj,
   4650             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
   4651       } else {
   4652         __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
   4653       }
   4654       break;
   4655     }
   4656 
   4657     case Primitive::kPrimFloat: {
   4658       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
   4659       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
   4660       if (index.IsConstant()) {
   4661         __ movss(out, Address(obj,
   4662             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
   4663       } else {
   4664         __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
   4665       }
   4666       break;
   4667     }
   4668 
   4669     case Primitive::kPrimDouble: {
   4670       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
   4671       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
   4672       if (index.IsConstant()) {
   4673         __ movsd(out, Address(obj,
   4674             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
   4675       } else {
   4676         __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
   4677       }
   4678       break;
   4679     }
   4680 
   4681     case Primitive::kPrimVoid:
   4682       LOG(FATAL) << "Unreachable type " << type;
   4683       UNREACHABLE();
   4684   }
   4685 
   4686   if (type == Primitive::kPrimNot) {
   4687     // Potential implicit null checks, in the case of reference
   4688     // arrays, are handled in the previous switch statement.
   4689   } else {
   4690     codegen_->MaybeRecordImplicitNullCheck(instruction);
   4691   }
   4692 }
   4693 
   4694 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
   4695   Primitive::Type value_type = instruction->GetComponentType();
   4696 
   4697   bool needs_write_barrier =
   4698       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   4699   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   4700   bool object_array_set_with_read_barrier =
   4701       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
   4702 
   4703   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
   4704       instruction,
   4705       (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
   4706           LocationSummary::kCallOnSlowPath :
   4707           LocationSummary::kNoCall);
   4708 
   4709   locations->SetInAt(0, Location::RequiresRegister());
   4710   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4711   if (Primitive::IsFloatingPointType(value_type)) {
   4712     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   4713   } else {
   4714     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   4715   }
   4716 
   4717   if (needs_write_barrier) {
   4718     // Temporary registers for the write barrier.
   4719 
   4720     // This first temporary register is possibly used for heap
   4721     // reference poisoning and/or read barrier emission too.
   4722     locations->AddTemp(Location::RequiresRegister());
   4723     locations->AddTemp(Location::RequiresRegister());
   4724   }
   4725 }
   4726 
   4727 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   4728   LocationSummary* locations = instruction->GetLocations();
   4729   Location array_loc = locations->InAt(0);
   4730   CpuRegister array = array_loc.AsRegister<CpuRegister>();
   4731   Location index = locations->InAt(1);
   4732   Location value = locations->InAt(2);
   4733   Primitive::Type value_type = instruction->GetComponentType();
   4734   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   4735   bool needs_write_barrier =
   4736       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   4737   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   4738   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   4739   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   4740 
   4741   switch (value_type) {
   4742     case Primitive::kPrimBoolean:
   4743     case Primitive::kPrimByte: {
   4744       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
   4745       Address address = index.IsConstant()
   4746           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
   4747           : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
   4748       if (value.IsRegister()) {
   4749         __ movb(address, value.AsRegister<CpuRegister>());
   4750       } else {
   4751         __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
   4752       }
   4753       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4754       break;
   4755     }
   4756 
   4757     case Primitive::kPrimShort:
   4758     case Primitive::kPrimChar: {
   4759       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
   4760       Address address = index.IsConstant()
   4761           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
   4762           : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
   4763       if (value.IsRegister()) {
   4764         __ movw(address, value.AsRegister<CpuRegister>());
   4765       } else {
   4766         DCHECK(value.IsConstant()) << value;
   4767         __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
   4768       }
   4769       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4770       break;
   4771     }
   4772 
   4773     case Primitive::kPrimNot: {
   4774       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4775       Address address = index.IsConstant()
   4776           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
   4777           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
   4778 
   4779       if (!value.IsRegister()) {
   4780         // Just setting null.
   4781         DCHECK(instruction->InputAt(2)->IsNullConstant());
   4782         DCHECK(value.IsConstant()) << value;
   4783         __ movl(address, Immediate(0));
   4784         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4785         DCHECK(!needs_write_barrier);
   4786         DCHECK(!may_need_runtime_call_for_type_check);
   4787         break;
   4788       }
   4789 
   4790       DCHECK(needs_write_barrier);
   4791       CpuRegister register_value = value.AsRegister<CpuRegister>();
   4792       NearLabel done, not_null, do_put;
   4793       SlowPathCode* slow_path = nullptr;
   4794       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   4795       if (may_need_runtime_call_for_type_check) {
   4796         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
   4797         codegen_->AddSlowPath(slow_path);
   4798         if (instruction->GetValueCanBeNull()) {
   4799           __ testl(register_value, register_value);
   4800           __ j(kNotEqual, &not_null);
   4801           __ movl(address, Immediate(0));
   4802           codegen_->MaybeRecordImplicitNullCheck(instruction);
   4803           __ jmp(&done);
   4804           __ Bind(&not_null);
   4805         }
   4806 
   4807         if (kEmitCompilerReadBarrier) {
   4808           // When read barriers are enabled, the type checking
   4809           // instrumentation requires two read barriers:
   4810           //
   4811           //   __ movl(temp2, temp);
   4812           //   // /* HeapReference<Class> */ temp = temp->component_type_
   4813           //   __ movl(temp, Address(temp, component_offset));
   4814           //   codegen_->GenerateReadBarrierSlow(
   4815           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
   4816           //
   4817           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
   4818           //   __ movl(temp2, Address(register_value, class_offset));
   4819           //   codegen_->GenerateReadBarrierSlow(
   4820           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
   4821           //
   4822           //   __ cmpl(temp, temp2);
   4823           //
   4824           // However, the second read barrier may trash `temp`, as it
   4825           // is a temporary register, and as such would not be saved
   4826           // along with live registers before calling the runtime (nor
   4827           // restored afterwards).  So in this case, we bail out and
   4828           // delegate the work to the array set slow path.
   4829           //
   4830           // TODO: Extend the register allocator to support a new
   4831           // "(locally) live temp" location so as to avoid always
   4832           // going into the slow path when read barriers are enabled.
   4833           __ jmp(slow_path->GetEntryLabel());
   4834         } else {
   4835           // /* HeapReference<Class> */ temp = array->klass_
   4836           __ movl(temp, Address(array, class_offset));
   4837           codegen_->MaybeRecordImplicitNullCheck(instruction);
   4838           __ MaybeUnpoisonHeapReference(temp);
   4839 
   4840           // /* HeapReference<Class> */ temp = temp->component_type_
   4841           __ movl(temp, Address(temp, component_offset));
   4842           // If heap poisoning is enabled, no need to unpoison `temp`
   4843           // nor the object reference in `register_value->klass`, as
   4844           // we are comparing two poisoned references.
   4845           __ cmpl(temp, Address(register_value, class_offset));
   4846 
   4847           if (instruction->StaticTypeOfArrayIsObjectArray()) {
   4848             __ j(kEqual, &do_put);
   4849             // If heap poisoning is enabled, the `temp` reference has
   4850             // not been unpoisoned yet; unpoison it now.
   4851             __ MaybeUnpoisonHeapReference(temp);
   4852 
   4853             // /* HeapReference<Class> */ temp = temp->super_class_
   4854             __ movl(temp, Address(temp, super_offset));
   4855             // If heap poisoning is enabled, no need to unpoison
   4856             // `temp`, as we are comparing against null below.
   4857             __ testl(temp, temp);
   4858             __ j(kNotEqual, slow_path->GetEntryLabel());
   4859             __ Bind(&do_put);
   4860           } else {
   4861             __ j(kNotEqual, slow_path->GetEntryLabel());
   4862           }
   4863         }
   4864       }
   4865 
   4866       if (kPoisonHeapReferences) {
   4867         __ movl(temp, register_value);
   4868         __ PoisonHeapReference(temp);
   4869         __ movl(address, temp);
   4870       } else {
   4871         __ movl(address, register_value);
   4872       }
   4873       if (!may_need_runtime_call_for_type_check) {
   4874         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4875       }
   4876 
   4877       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
   4878       codegen_->MarkGCCard(
   4879           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
   4880       __ Bind(&done);
   4881 
   4882       if (slow_path != nullptr) {
   4883         __ Bind(slow_path->GetExitLabel());
   4884       }
   4885 
   4886       break;
   4887     }
   4888 
   4889     case Primitive::kPrimInt: {
   4890       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
   4891       Address address = index.IsConstant()
   4892           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
   4893           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
   4894       if (value.IsRegister()) {
   4895         __ movl(address, value.AsRegister<CpuRegister>());
   4896       } else {
   4897         DCHECK(value.IsConstant()) << value;
   4898         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
   4899         __ movl(address, Immediate(v));
   4900       }
   4901       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4902       break;
   4903     }
   4904 
   4905     case Primitive::kPrimLong: {
   4906       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
   4907       Address address = index.IsConstant()
   4908           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
   4909           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
   4910       if (value.IsRegister()) {
   4911         __ movq(address, value.AsRegister<CpuRegister>());
   4912         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4913       } else {
   4914         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
   4915         Address address_high = index.IsConstant()
   4916             ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
   4917                 offset + sizeof(int32_t))
   4918             : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
   4919         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
   4920       }
   4921       break;
   4922     }
   4923 
   4924     case Primitive::kPrimFloat: {
   4925       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
   4926       Address address = index.IsConstant()
   4927           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
   4928           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
   4929       if (value.IsFpuRegister()) {
   4930         __ movss(address, value.AsFpuRegister<XmmRegister>());
   4931       } else {
   4932         DCHECK(value.IsConstant());
   4933         int32_t v =
   4934             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
   4935         __ movl(address, Immediate(v));
   4936       }
   4937       codegen_->MaybeRecordImplicitNullCheck(instruction);
   4938       break;
   4939     }
   4940 
   4941     case Primitive::kPrimDouble: {
   4942       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
   4943       Address address = index.IsConstant()
   4944           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
   4945           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
   4946       if (value.IsFpuRegister()) {
   4947         __ movsd(address, value.AsFpuRegister<XmmRegister>());
   4948         codegen_->MaybeRecordImplicitNullCheck(instruction);
   4949       } else {
   4950         int64_t v =
   4951             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
   4952         Address address_high = index.IsConstant()
   4953             ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
   4954                 offset + sizeof(int32_t))
   4955             : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
   4956         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
   4957       }
   4958       break;
   4959     }
   4960 
   4961     case Primitive::kPrimVoid:
   4962       LOG(FATAL) << "Unreachable type " << instruction->GetType();
   4963       UNREACHABLE();
   4964   }
   4965 }
   4966 
   4967 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
   4968   LocationSummary* locations =
   4969       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   4970   locations->SetInAt(0, Location::RequiresRegister());
   4971   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   4972 }
   4973 
   4974 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
   4975   LocationSummary* locations = instruction->GetLocations();
   4976   uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
   4977   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   4978   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   4979   __ movl(out, Address(obj, offset));
   4980   codegen_->MaybeRecordImplicitNullCheck(instruction);
   4981 }
   4982 
   4983 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
   4984   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
   4985       ? LocationSummary::kCallOnSlowPath
   4986       : LocationSummary::kNoCall;
   4987   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   4988   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   4989   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   4990   if (instruction->HasUses()) {
   4991     locations->SetOut(Location::SameAsFirstInput());
   4992   }
   4993 }
   4994 
   4995 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
   4996   LocationSummary* locations = instruction->GetLocations();
   4997   Location index_loc = locations->InAt(0);
   4998   Location length_loc = locations->InAt(1);
   4999   SlowPathCode* slow_path =
   5000       new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
   5001 
   5002   if (length_loc.IsConstant()) {
   5003     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
   5004     if (index_loc.IsConstant()) {
   5005       // BCE will remove the bounds check if we are guarenteed to pass.
   5006       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
   5007       if (index < 0 || index >= length) {
   5008         codegen_->AddSlowPath(slow_path);
   5009         __ jmp(slow_path->GetEntryLabel());
   5010       } else {
   5011         // Some optimization after BCE may have generated this, and we should not
   5012         // generate a bounds check if it is a valid range.
   5013       }
   5014       return;
   5015     }
   5016 
   5017     // We have to reverse the jump condition because the length is the constant.
   5018     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
   5019     __ cmpl(index_reg, Immediate(length));
   5020     codegen_->AddSlowPath(slow_path);
   5021     __ j(kAboveEqual, slow_path->GetEntryLabel());
   5022   } else {
   5023     CpuRegister length = length_loc.AsRegister<CpuRegister>();
   5024     if (index_loc.IsConstant()) {
   5025       int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
   5026       __ cmpl(length, Immediate(value));
   5027     } else {
   5028       __ cmpl(length, index_loc.AsRegister<CpuRegister>());
   5029     }
   5030     codegen_->AddSlowPath(slow_path);
   5031     __ j(kBelowEqual, slow_path->GetEntryLabel());
   5032   }
   5033 }
   5034 
   5035 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
   5036                                      CpuRegister card,
   5037                                      CpuRegister object,
   5038                                      CpuRegister value,
   5039                                      bool value_can_be_null) {
   5040   NearLabel is_null;
   5041   if (value_can_be_null) {
   5042     __ testl(value, value);
   5043     __ j(kEqual, &is_null);
   5044   }
   5045   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
   5046                                         /* no_rip */ true));
   5047   __ movq(temp, object);
   5048   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
   5049   __ movb(Address(temp, card, TIMES_1, 0), card);
   5050   if (value_can_be_null) {
   5051     __ Bind(&is_null);
   5052   }
   5053 }
   5054 
   5055 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   5056   LOG(FATAL) << "Unimplemented";
   5057 }
   5058 
   5059 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
   5060   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
   5061 }
   5062 
   5063 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5064   new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
   5065 }
   5066 
   5067 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
   5068   HBasicBlock* block = instruction->GetBlock();
   5069   if (block->GetLoopInformation() != nullptr) {
   5070     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
   5071     // The back edge will generate the suspend check.
   5072     return;
   5073   }
   5074   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
   5075     // The goto will generate the suspend check.
   5076     return;
   5077   }
   5078   GenerateSuspendCheck(instruction, nullptr);
   5079 }
   5080 
   5081 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
   5082                                                           HBasicBlock* successor) {
   5083   SuspendCheckSlowPathX86_64* slow_path =
   5084       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
   5085   if (slow_path == nullptr) {
   5086     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
   5087     instruction->SetSlowPath(slow_path);
   5088     codegen_->AddSlowPath(slow_path);
   5089     if (successor != nullptr) {
   5090       DCHECK(successor->IsLoopHeader());
   5091       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
   5092     }
   5093   } else {
   5094     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   5095   }
   5096 
   5097   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
   5098                                   /* no_rip */ true),
   5099                 Immediate(0));
   5100   if (successor == nullptr) {
   5101     __ j(kNotEqual, slow_path->GetEntryLabel());
   5102     __ Bind(slow_path->GetReturnLabel());
   5103   } else {
   5104     __ j(kEqual, codegen_->GetLabelOf(successor));
   5105     __ jmp(slow_path->GetEntryLabel());
   5106   }
   5107 }
   5108 
   5109 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
   5110   return codegen_->GetAssembler();
   5111 }
   5112 
   5113 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
   5114   MoveOperands* move = moves_[index];
   5115   Location source = move->GetSource();
   5116   Location destination = move->GetDestination();
   5117 
   5118   if (source.IsRegister()) {
   5119     if (destination.IsRegister()) {
   5120       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
   5121     } else if (destination.IsStackSlot()) {
   5122       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5123               source.AsRegister<CpuRegister>());
   5124     } else {
   5125       DCHECK(destination.IsDoubleStackSlot());
   5126       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5127               source.AsRegister<CpuRegister>());
   5128     }
   5129   } else if (source.IsStackSlot()) {
   5130     if (destination.IsRegister()) {
   5131       __ movl(destination.AsRegister<CpuRegister>(),
   5132               Address(CpuRegister(RSP), source.GetStackIndex()));
   5133     } else if (destination.IsFpuRegister()) {
   5134       __ movss(destination.AsFpuRegister<XmmRegister>(),
   5135               Address(CpuRegister(RSP), source.GetStackIndex()));
   5136     } else {
   5137       DCHECK(destination.IsStackSlot());
   5138       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   5139       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   5140     }
   5141   } else if (source.IsDoubleStackSlot()) {
   5142     if (destination.IsRegister()) {
   5143       __ movq(destination.AsRegister<CpuRegister>(),
   5144               Address(CpuRegister(RSP), source.GetStackIndex()));
   5145     } else if (destination.IsFpuRegister()) {
   5146       __ movsd(destination.AsFpuRegister<XmmRegister>(),
   5147                Address(CpuRegister(RSP), source.GetStackIndex()));
   5148     } else {
   5149       DCHECK(destination.IsDoubleStackSlot()) << destination;
   5150       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
   5151       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
   5152     }
   5153   } else if (source.IsConstant()) {
   5154     HConstant* constant = source.GetConstant();
   5155     if (constant->IsIntConstant() || constant->IsNullConstant()) {
   5156       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
   5157       if (destination.IsRegister()) {
   5158         if (value == 0) {
   5159           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   5160         } else {
   5161           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
   5162         }
   5163       } else {
   5164         DCHECK(destination.IsStackSlot()) << destination;
   5165         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
   5166       }
   5167     } else if (constant->IsLongConstant()) {
   5168       int64_t value = constant->AsLongConstant()->GetValue();
   5169       if (destination.IsRegister()) {
   5170         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
   5171       } else {
   5172         DCHECK(destination.IsDoubleStackSlot()) << destination;
   5173         codegen_->Store64BitValueToStack(destination, value);
   5174       }
   5175     } else if (constant->IsFloatConstant()) {
   5176       float fp_value = constant->AsFloatConstant()->GetValue();
   5177       if (destination.IsFpuRegister()) {
   5178         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   5179         codegen_->Load32BitValue(dest, fp_value);
   5180       } else {
   5181         DCHECK(destination.IsStackSlot()) << destination;
   5182         Immediate imm(bit_cast<int32_t, float>(fp_value));
   5183         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
   5184       }
   5185     } else {
   5186       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
   5187       double fp_value =  constant->AsDoubleConstant()->GetValue();
   5188       int64_t value = bit_cast<int64_t, double>(fp_value);
   5189       if (destination.IsFpuRegister()) {
   5190         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
   5191         codegen_->Load64BitValue(dest, fp_value);
   5192       } else {
   5193         DCHECK(destination.IsDoubleStackSlot()) << destination;
   5194         codegen_->Store64BitValueToStack(destination, value);
   5195       }
   5196     }
   5197   } else if (source.IsFpuRegister()) {
   5198     if (destination.IsFpuRegister()) {
   5199       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
   5200     } else if (destination.IsStackSlot()) {
   5201       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5202                source.AsFpuRegister<XmmRegister>());
   5203     } else {
   5204       DCHECK(destination.IsDoubleStackSlot()) << destination;
   5205       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
   5206                source.AsFpuRegister<XmmRegister>());
   5207     }
   5208   }
   5209 }
   5210 
   5211 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
   5212   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5213   __ movl(Address(CpuRegister(RSP), mem), reg);
   5214   __ movl(reg, CpuRegister(TMP));
   5215 }
   5216 
   5217 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
   5218   ScratchRegisterScope ensure_scratch(
   5219       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
   5220 
   5221   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
   5222   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
   5223   __ movl(CpuRegister(ensure_scratch.GetRegister()),
   5224           Address(CpuRegister(RSP), mem2 + stack_offset));
   5225   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
   5226   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
   5227           CpuRegister(ensure_scratch.GetRegister()));
   5228 }
   5229 
   5230 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
   5231   __ movq(CpuRegister(TMP), reg1);
   5232   __ movq(reg1, reg2);
   5233   __ movq(reg2, CpuRegister(TMP));
   5234 }
   5235 
   5236 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
   5237   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5238   __ movq(Address(CpuRegister(RSP), mem), reg);
   5239   __ movq(reg, CpuRegister(TMP));
   5240 }
   5241 
   5242 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
   5243   ScratchRegisterScope ensure_scratch(
   5244       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
   5245 
   5246   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
   5247   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
   5248   __ movq(CpuRegister(ensure_scratch.GetRegister()),
   5249           Address(CpuRegister(RSP), mem2 + stack_offset));
   5250   __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
   5251   __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
   5252           CpuRegister(ensure_scratch.GetRegister()));
   5253 }
   5254 
   5255 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
   5256   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5257   __ movss(Address(CpuRegister(RSP), mem), reg);
   5258   __ movd(reg, CpuRegister(TMP));
   5259 }
   5260 
   5261 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
   5262   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   5263   __ movsd(Address(CpuRegister(RSP), mem), reg);
   5264   __ movd(reg, CpuRegister(TMP));
   5265 }
   5266 
   5267 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   5268   MoveOperands* move = moves_[index];
   5269   Location source = move->GetSource();
   5270   Location destination = move->GetDestination();
   5271 
   5272   if (source.IsRegister() && destination.IsRegister()) {
   5273     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   5274   } else if (source.IsRegister() && destination.IsStackSlot()) {
   5275     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   5276   } else if (source.IsStackSlot() && destination.IsRegister()) {
   5277     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   5278   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
   5279     Exchange32(destination.GetStackIndex(), source.GetStackIndex());
   5280   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
   5281     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   5282   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
   5283     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   5284   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
   5285     Exchange64(destination.GetStackIndex(), source.GetStackIndex());
   5286   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
   5287     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
   5288     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
   5289     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
   5290   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
   5291     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   5292   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
   5293     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   5294   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
   5295     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   5296   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
   5297     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   5298   } else {
   5299     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
   5300   }
   5301 }
   5302 
   5303 
   5304 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
   5305   __ pushq(CpuRegister(reg));
   5306 }
   5307 
   5308 
   5309 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
   5310   __ popq(CpuRegister(reg));
   5311 }
   5312 
   5313 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
   5314     SlowPathCode* slow_path, CpuRegister class_reg) {
   5315   __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
   5316           Immediate(mirror::Class::kStatusInitialized));
   5317   __ j(kLess, slow_path->GetEntryLabel());
   5318   __ Bind(slow_path->GetExitLabel());
   5319   // No need for memory fence, thanks to the x86-64 memory model.
   5320 }
   5321 
   5322 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
   5323   InvokeRuntimeCallingConvention calling_convention;
   5324   CodeGenerator::CreateLoadClassLocationSummary(
   5325       cls,
   5326       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
   5327       Location::RegisterLocation(RAX),
   5328       /* code_generator_supports_read_barrier */ true);
   5329 }
   5330 
   5331 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
   5332   LocationSummary* locations = cls->GetLocations();
   5333   if (cls->NeedsAccessCheck()) {
   5334     codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
   5335     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
   5336                             cls,
   5337                             cls->GetDexPc(),
   5338                             nullptr);
   5339     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
   5340     return;
   5341   }
   5342 
   5343   Location out_loc = locations->Out();
   5344   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5345   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
   5346 
   5347   if (cls->IsReferrersClass()) {
   5348     DCHECK(!cls->CanCallRuntime());
   5349     DCHECK(!cls->MustGenerateClinitCheck());
   5350     // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
   5351     GenerateGcRootFieldLoad(
   5352         cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
   5353   } else {
   5354     // /* GcRoot<mirror::Class>[] */ out =
   5355     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
   5356     __ movq(out, Address(current_method,
   5357                          ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
   5358     // /* GcRoot<mirror::Class> */ out = out[type_index]
   5359     GenerateGcRootFieldLoad(
   5360         cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
   5361 
   5362     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
   5363       DCHECK(cls->CanCallRuntime());
   5364       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
   5365           cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
   5366       codegen_->AddSlowPath(slow_path);
   5367       if (!cls->IsInDexCache()) {
   5368         __ testl(out, out);
   5369         __ j(kEqual, slow_path->GetEntryLabel());
   5370       }
   5371       if (cls->MustGenerateClinitCheck()) {
   5372         GenerateClassInitializationCheck(slow_path, out);
   5373       } else {
   5374         __ Bind(slow_path->GetExitLabel());
   5375       }
   5376     }
   5377   }
   5378 }
   5379 
   5380 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
   5381   LocationSummary* locations =
   5382       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
   5383   locations->SetInAt(0, Location::RequiresRegister());
   5384   if (check->HasUses()) {
   5385     locations->SetOut(Location::SameAsFirstInput());
   5386   }
   5387 }
   5388 
   5389 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
   5390   // We assume the class to not be null.
   5391   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
   5392       check->GetLoadClass(), check, check->GetDexPc(), true);
   5393   codegen_->AddSlowPath(slow_path);
   5394   GenerateClassInitializationCheck(slow_path,
   5395                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
   5396 }
   5397 
   5398 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
   5399     HLoadString::LoadKind desired_string_load_kind) {
   5400   if (kEmitCompilerReadBarrier) {
   5401     switch (desired_string_load_kind) {
   5402       case HLoadString::LoadKind::kBootImageLinkTimeAddress:
   5403       case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
   5404       case HLoadString::LoadKind::kBootImageAddress:
   5405         // TODO: Implement for read barrier.
   5406         return HLoadString::LoadKind::kDexCacheViaMethod;
   5407       default:
   5408         break;
   5409     }
   5410   }
   5411   switch (desired_string_load_kind) {
   5412     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
   5413       DCHECK(!GetCompilerOptions().GetCompilePic());
   5414       // We prefer the always-available RIP-relative address for the x86-64 boot image.
   5415       return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
   5416     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
   5417       DCHECK(GetCompilerOptions().GetCompilePic());
   5418       break;
   5419     case HLoadString::LoadKind::kBootImageAddress:
   5420       break;
   5421     case HLoadString::LoadKind::kDexCacheAddress:
   5422       DCHECK(Runtime::Current()->UseJitCompilation());
   5423       break;
   5424     case HLoadString::LoadKind::kDexCachePcRelative:
   5425       DCHECK(!Runtime::Current()->UseJitCompilation());
   5426       break;
   5427     case HLoadString::LoadKind::kDexCacheViaMethod:
   5428       break;
   5429   }
   5430   return desired_string_load_kind;
   5431 }
   5432 
   5433 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
   5434   LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
   5435       ? LocationSummary::kCallOnSlowPath
   5436       : LocationSummary::kNoCall;
   5437   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   5438   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
   5439     locations->SetInAt(0, Location::RequiresRegister());
   5440   }
   5441   locations->SetOut(Location::RequiresRegister());
   5442 }
   5443 
   5444 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
   5445   LocationSummary* locations = load->GetLocations();
   5446   Location out_loc = locations->Out();
   5447   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5448 
   5449   switch (load->GetLoadKind()) {
   5450     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
   5451       DCHECK(!kEmitCompilerReadBarrier);
   5452       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
   5453       codegen_->RecordStringPatch(load);
   5454       return;  // No dex cache slow path.
   5455     }
   5456     case HLoadString::LoadKind::kBootImageAddress: {
   5457       DCHECK(!kEmitCompilerReadBarrier);
   5458       DCHECK_NE(load->GetAddress(), 0u);
   5459       uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
   5460       __ movl(out, Immediate(address));  // Zero-extended.
   5461       codegen_->RecordSimplePatch();
   5462       return;  // No dex cache slow path.
   5463     }
   5464     case HLoadString::LoadKind::kDexCacheAddress: {
   5465       DCHECK_NE(load->GetAddress(), 0u);
   5466       if (IsUint<32>(load->GetAddress())) {
   5467         Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
   5468         GenerateGcRootFieldLoad(load, out_loc, address);
   5469       } else {
   5470         // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
   5471         __ movq(out, Immediate(load->GetAddress()));
   5472         GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
   5473       }
   5474       break;
   5475     }
   5476     case HLoadString::LoadKind::kDexCachePcRelative: {
   5477       uint32_t offset = load->GetDexCacheElementOffset();
   5478       Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
   5479       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
   5480                                           /* no_rip */ false);
   5481       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
   5482       break;
   5483     }
   5484     case HLoadString::LoadKind::kDexCacheViaMethod: {
   5485       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
   5486 
   5487       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
   5488       GenerateGcRootFieldLoad(
   5489           load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
   5490       // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   5491       __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
   5492       // /* GcRoot<mirror::String> */ out = out[string_index]
   5493       GenerateGcRootFieldLoad(
   5494           load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
   5495       break;
   5496     }
   5497     default:
   5498       LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
   5499       UNREACHABLE();
   5500   }
   5501 
   5502   if (!load->IsInDexCache()) {
   5503     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
   5504     codegen_->AddSlowPath(slow_path);
   5505     __ testl(out, out);
   5506     __ j(kEqual, slow_path->GetEntryLabel());
   5507     __ Bind(slow_path->GetExitLabel());
   5508   }
   5509 }
   5510 
   5511 static Address GetExceptionTlsAddress() {
   5512   return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
   5513                            /* no_rip */ true);
   5514 }
   5515 
   5516 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
   5517   LocationSummary* locations =
   5518       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
   5519   locations->SetOut(Location::RequiresRegister());
   5520 }
   5521 
   5522 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
   5523   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
   5524 }
   5525 
   5526 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
   5527   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
   5528 }
   5529 
   5530 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
   5531   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
   5532 }
   5533 
   5534 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
   5535   LocationSummary* locations =
   5536       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   5537   InvokeRuntimeCallingConvention calling_convention;
   5538   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   5539 }
   5540 
   5541 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
   5542   codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
   5543                           instruction,
   5544                           instruction->GetDexPc(),
   5545                           nullptr);
   5546   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
   5547 }
   5548 
   5549 static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   5550   return kEmitCompilerReadBarrier &&
   5551       (kUseBakerReadBarrier ||
   5552        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   5553        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   5554        type_check_kind == TypeCheckKind::kArrayObjectCheck);
   5555 }
   5556 
   5557 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   5558   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   5559   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5560   switch (type_check_kind) {
   5561     case TypeCheckKind::kExactCheck:
   5562     case TypeCheckKind::kAbstractClassCheck:
   5563     case TypeCheckKind::kClassHierarchyCheck:
   5564     case TypeCheckKind::kArrayObjectCheck:
   5565       call_kind =
   5566           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
   5567       break;
   5568     case TypeCheckKind::kArrayCheck:
   5569     case TypeCheckKind::kUnresolvedCheck:
   5570     case TypeCheckKind::kInterfaceCheck:
   5571       call_kind = LocationSummary::kCallOnSlowPath;
   5572       break;
   5573   }
   5574 
   5575   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   5576   locations->SetInAt(0, Location::RequiresRegister());
   5577   locations->SetInAt(1, Location::Any());
   5578   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
   5579   locations->SetOut(Location::RequiresRegister());
   5580   // When read barriers are enabled, we need a temporary register for
   5581   // some cases.
   5582   if (TypeCheckNeedsATemporary(type_check_kind)) {
   5583     locations->AddTemp(Location::RequiresRegister());
   5584   }
   5585 }
   5586 
   5587 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   5588   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5589   LocationSummary* locations = instruction->GetLocations();
   5590   Location obj_loc = locations->InAt(0);
   5591   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   5592   Location cls = locations->InAt(1);
   5593   Location out_loc =  locations->Out();
   5594   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   5595   Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
   5596       locations->GetTemp(0) :
   5597       Location::NoLocation();
   5598   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   5599   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   5600   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   5601   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   5602   SlowPathCode* slow_path = nullptr;
   5603   NearLabel done, zero;
   5604 
   5605   // Return 0 if `obj` is null.
   5606   // Avoid null check if we know obj is not null.
   5607   if (instruction->MustDoNullCheck()) {
   5608     __ testl(obj, obj);
   5609     __ j(kEqual, &zero);
   5610   }
   5611 
   5612   // /* HeapReference<Class> */ out = obj->klass_
   5613   GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
   5614 
   5615   switch (type_check_kind) {
   5616     case TypeCheckKind::kExactCheck: {
   5617       if (cls.IsRegister()) {
   5618         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5619       } else {
   5620         DCHECK(cls.IsStackSlot()) << cls;
   5621         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5622       }
   5623       if (zero.IsLinked()) {
   5624         // Classes must be equal for the instanceof to succeed.
   5625         __ j(kNotEqual, &zero);
   5626         __ movl(out, Immediate(1));
   5627         __ jmp(&done);
   5628       } else {
   5629         __ setcc(kEqual, out);
   5630         // setcc only sets the low byte.
   5631         __ andl(out, Immediate(1));
   5632       }
   5633       break;
   5634     }
   5635 
   5636     case TypeCheckKind::kAbstractClassCheck: {
   5637       // If the class is abstract, we eagerly fetch the super class of the
   5638       // object to avoid doing a comparison we know will fail.
   5639       NearLabel loop, success;
   5640       __ Bind(&loop);
   5641       // /* HeapReference<Class> */ out = out->super_class_
   5642       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
   5643       __ testl(out, out);
   5644       // If `out` is null, we use it for the result, and jump to `done`.
   5645       __ j(kEqual, &done);
   5646       if (cls.IsRegister()) {
   5647         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5648       } else {
   5649         DCHECK(cls.IsStackSlot()) << cls;
   5650         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5651       }
   5652       __ j(kNotEqual, &loop);
   5653       __ movl(out, Immediate(1));
   5654       if (zero.IsLinked()) {
   5655         __ jmp(&done);
   5656       }
   5657       break;
   5658     }
   5659 
   5660     case TypeCheckKind::kClassHierarchyCheck: {
   5661       // Walk over the class hierarchy to find a match.
   5662       NearLabel loop, success;
   5663       __ Bind(&loop);
   5664       if (cls.IsRegister()) {
   5665         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5666       } else {
   5667         DCHECK(cls.IsStackSlot()) << cls;
   5668         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5669       }
   5670       __ j(kEqual, &success);
   5671       // /* HeapReference<Class> */ out = out->super_class_
   5672       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
   5673       __ testl(out, out);
   5674       __ j(kNotEqual, &loop);
   5675       // If `out` is null, we use it for the result, and jump to `done`.
   5676       __ jmp(&done);
   5677       __ Bind(&success);
   5678       __ movl(out, Immediate(1));
   5679       if (zero.IsLinked()) {
   5680         __ jmp(&done);
   5681       }
   5682       break;
   5683     }
   5684 
   5685     case TypeCheckKind::kArrayObjectCheck: {
   5686       // Do an exact check.
   5687       NearLabel exact_check;
   5688       if (cls.IsRegister()) {
   5689         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5690       } else {
   5691         DCHECK(cls.IsStackSlot()) << cls;
   5692         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5693       }
   5694       __ j(kEqual, &exact_check);
   5695       // Otherwise, we need to check that the object's class is a non-primitive array.
   5696       // /* HeapReference<Class> */ out = out->component_type_
   5697       GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
   5698       __ testl(out, out);
   5699       // If `out` is null, we use it for the result, and jump to `done`.
   5700       __ j(kEqual, &done);
   5701       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
   5702       __ j(kNotEqual, &zero);
   5703       __ Bind(&exact_check);
   5704       __ movl(out, Immediate(1));
   5705       __ jmp(&done);
   5706       break;
   5707     }
   5708 
   5709     case TypeCheckKind::kArrayCheck: {
   5710       if (cls.IsRegister()) {
   5711         __ cmpl(out, cls.AsRegister<CpuRegister>());
   5712       } else {
   5713         DCHECK(cls.IsStackSlot()) << cls;
   5714         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5715       }
   5716       DCHECK(locations->OnlyCallsOnSlowPath());
   5717       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
   5718                                                                        /* is_fatal */ false);
   5719       codegen_->AddSlowPath(slow_path);
   5720       __ j(kNotEqual, slow_path->GetEntryLabel());
   5721       __ movl(out, Immediate(1));
   5722       if (zero.IsLinked()) {
   5723         __ jmp(&done);
   5724       }
   5725       break;
   5726     }
   5727 
   5728     case TypeCheckKind::kUnresolvedCheck:
   5729     case TypeCheckKind::kInterfaceCheck: {
   5730       // Note that we indeed only call on slow path, but we always go
   5731       // into the slow path for the unresolved and interface check
   5732       // cases.
   5733       //
   5734       // We cannot directly call the InstanceofNonTrivial runtime
   5735       // entry point without resorting to a type checking slow path
   5736       // here (i.e. by calling InvokeRuntime directly), as it would
   5737       // require to assign fixed registers for the inputs of this
   5738       // HInstanceOf instruction (following the runtime calling
   5739       // convention), which might be cluttered by the potential first
   5740       // read barrier emission at the beginning of this method.
   5741       //
   5742       // TODO: Introduce a new runtime entry point taking the object
   5743       // to test (instead of its class) as argument, and let it deal
   5744       // with the read barrier issues. This will let us refactor this
   5745       // case of the `switch` code as it was previously (with a direct
   5746       // call to the runtime not using a type checking slow path).
   5747       // This should also be beneficial for the other cases above.
   5748       DCHECK(locations->OnlyCallsOnSlowPath());
   5749       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
   5750                                                                        /* is_fatal */ false);
   5751       codegen_->AddSlowPath(slow_path);
   5752       __ jmp(slow_path->GetEntryLabel());
   5753       if (zero.IsLinked()) {
   5754         __ jmp(&done);
   5755       }
   5756       break;
   5757     }
   5758   }
   5759 
   5760   if (zero.IsLinked()) {
   5761     __ Bind(&zero);
   5762     __ xorl(out, out);
   5763   }
   5764 
   5765   if (done.IsLinked()) {
   5766     __ Bind(&done);
   5767   }
   5768 
   5769   if (slow_path != nullptr) {
   5770     __ Bind(slow_path->GetExitLabel());
   5771   }
   5772 }
   5773 
   5774 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
   5775   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   5776   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
   5777   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5778   switch (type_check_kind) {
   5779     case TypeCheckKind::kExactCheck:
   5780     case TypeCheckKind::kAbstractClassCheck:
   5781     case TypeCheckKind::kClassHierarchyCheck:
   5782     case TypeCheckKind::kArrayObjectCheck:
   5783       call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
   5784           LocationSummary::kCallOnSlowPath :
   5785           LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
   5786       break;
   5787     case TypeCheckKind::kArrayCheck:
   5788     case TypeCheckKind::kUnresolvedCheck:
   5789     case TypeCheckKind::kInterfaceCheck:
   5790       call_kind = LocationSummary::kCallOnSlowPath;
   5791       break;
   5792   }
   5793   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   5794   locations->SetInAt(0, Location::RequiresRegister());
   5795   locations->SetInAt(1, Location::Any());
   5796   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
   5797   locations->AddTemp(Location::RequiresRegister());
   5798   // When read barriers are enabled, we need an additional temporary
   5799   // register for some cases.
   5800   if (TypeCheckNeedsATemporary(type_check_kind)) {
   5801     locations->AddTemp(Location::RequiresRegister());
   5802   }
   5803 }
   5804 
   5805 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
   5806   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   5807   LocationSummary* locations = instruction->GetLocations();
   5808   Location obj_loc = locations->InAt(0);
   5809   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   5810   Location cls = locations->InAt(1);
   5811   Location temp_loc = locations->GetTemp(0);
   5812   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
   5813   Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
   5814       locations->GetTemp(1) :
   5815       Location::NoLocation();
   5816   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   5817   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   5818   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   5819   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   5820 
   5821   bool is_type_check_slow_path_fatal =
   5822       (type_check_kind == TypeCheckKind::kExactCheck ||
   5823        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
   5824        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
   5825        type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
   5826       !instruction->CanThrowIntoCatchBlock();
   5827   SlowPathCode* type_check_slow_path =
   5828       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
   5829                                                            is_type_check_slow_path_fatal);
   5830   codegen_->AddSlowPath(type_check_slow_path);
   5831 
   5832   switch (type_check_kind) {
   5833     case TypeCheckKind::kExactCheck:
   5834     case TypeCheckKind::kArrayCheck: {
   5835       NearLabel done;
   5836       // Avoid null check if we know obj is not null.
   5837       if (instruction->MustDoNullCheck()) {
   5838         __ testl(obj, obj);
   5839         __ j(kEqual, &done);
   5840       }
   5841 
   5842       // /* HeapReference<Class> */ temp = obj->klass_
   5843       GenerateReferenceLoadTwoRegisters(
   5844           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   5845 
   5846       if (cls.IsRegister()) {
   5847         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   5848       } else {
   5849         DCHECK(cls.IsStackSlot()) << cls;
   5850         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5851       }
   5852       // Jump to slow path for throwing the exception or doing a
   5853       // more involved array check.
   5854       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
   5855       __ Bind(&done);
   5856       break;
   5857     }
   5858 
   5859     case TypeCheckKind::kAbstractClassCheck: {
   5860       NearLabel done;
   5861       // Avoid null check if we know obj is not null.
   5862       if (instruction->MustDoNullCheck()) {
   5863         __ testl(obj, obj);
   5864         __ j(kEqual, &done);
   5865       }
   5866 
   5867       // /* HeapReference<Class> */ temp = obj->klass_
   5868       GenerateReferenceLoadTwoRegisters(
   5869           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   5870 
   5871       // If the class is abstract, we eagerly fetch the super class of the
   5872       // object to avoid doing a comparison we know will fail.
   5873       NearLabel loop, compare_classes;
   5874       __ Bind(&loop);
   5875       // /* HeapReference<Class> */ temp = temp->super_class_
   5876       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
   5877 
   5878       // If the class reference currently in `temp` is not null, jump
   5879       // to the `compare_classes` label to compare it with the checked
   5880       // class.
   5881       __ testl(temp, temp);
   5882       __ j(kNotEqual, &compare_classes);
   5883       // Otherwise, jump to the slow path to throw the exception.
   5884       //
   5885       // But before, move back the object's class into `temp` before
   5886       // going into the slow path, as it has been overwritten in the
   5887       // meantime.
   5888       // /* HeapReference<Class> */ temp = obj->klass_
   5889       GenerateReferenceLoadTwoRegisters(
   5890           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   5891       __ jmp(type_check_slow_path->GetEntryLabel());
   5892 
   5893       __ Bind(&compare_classes);
   5894       if (cls.IsRegister()) {
   5895         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   5896       } else {
   5897         DCHECK(cls.IsStackSlot()) << cls;
   5898         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5899       }
   5900       __ j(kNotEqual, &loop);
   5901       __ Bind(&done);
   5902       break;
   5903     }
   5904 
   5905     case TypeCheckKind::kClassHierarchyCheck: {
   5906       NearLabel done;
   5907       // Avoid null check if we know obj is not null.
   5908       if (instruction->MustDoNullCheck()) {
   5909         __ testl(obj, obj);
   5910         __ j(kEqual, &done);
   5911       }
   5912 
   5913       // /* HeapReference<Class> */ temp = obj->klass_
   5914       GenerateReferenceLoadTwoRegisters(
   5915           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   5916 
   5917       // Walk over the class hierarchy to find a match.
   5918       NearLabel loop;
   5919       __ Bind(&loop);
   5920       if (cls.IsRegister()) {
   5921         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   5922       } else {
   5923         DCHECK(cls.IsStackSlot()) << cls;
   5924         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5925       }
   5926       __ j(kEqual, &done);
   5927 
   5928       // /* HeapReference<Class> */ temp = temp->super_class_
   5929       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
   5930 
   5931       // If the class reference currently in `temp` is not null, jump
   5932       // back at the beginning of the loop.
   5933       __ testl(temp, temp);
   5934       __ j(kNotEqual, &loop);
   5935       // Otherwise, jump to the slow path to throw the exception.
   5936       //
   5937       // But before, move back the object's class into `temp` before
   5938       // going into the slow path, as it has been overwritten in the
   5939       // meantime.
   5940       // /* HeapReference<Class> */ temp = obj->klass_
   5941       GenerateReferenceLoadTwoRegisters(
   5942           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   5943       __ jmp(type_check_slow_path->GetEntryLabel());
   5944       __ Bind(&done);
   5945       break;
   5946     }
   5947 
   5948     case TypeCheckKind::kArrayObjectCheck: {
   5949       // We cannot use a NearLabel here, as its range might be too
   5950       // short in some cases when read barriers are enabled.  This has
   5951       // been observed for instance when the code emitted for this
   5952       // case uses high x86-64 registers (R8-R15).
   5953       Label done;
   5954       // Avoid null check if we know obj is not null.
   5955       if (instruction->MustDoNullCheck()) {
   5956         __ testl(obj, obj);
   5957         __ j(kEqual, &done);
   5958       }
   5959 
   5960       // /* HeapReference<Class> */ temp = obj->klass_
   5961       GenerateReferenceLoadTwoRegisters(
   5962           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   5963 
   5964       // Do an exact check.
   5965       NearLabel check_non_primitive_component_type;
   5966       if (cls.IsRegister()) {
   5967         __ cmpl(temp, cls.AsRegister<CpuRegister>());
   5968       } else {
   5969         DCHECK(cls.IsStackSlot()) << cls;
   5970         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   5971       }
   5972       __ j(kEqual, &done);
   5973 
   5974       // Otherwise, we need to check that the object's class is a non-primitive array.
   5975       // /* HeapReference<Class> */ temp = temp->component_type_
   5976       GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
   5977 
   5978       // If the component type is not null (i.e. the object is indeed
   5979       // an array), jump to label `check_non_primitive_component_type`
   5980       // to further check that this component type is not a primitive
   5981       // type.
   5982       __ testl(temp, temp);
   5983       __ j(kNotEqual, &check_non_primitive_component_type);
   5984       // Otherwise, jump to the slow path to throw the exception.
   5985       //
   5986       // But before, move back the object's class into `temp` before
   5987       // going into the slow path, as it has been overwritten in the
   5988       // meantime.
   5989       // /* HeapReference<Class> */ temp = obj->klass_
   5990       GenerateReferenceLoadTwoRegisters(
   5991           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   5992       __ jmp(type_check_slow_path->GetEntryLabel());
   5993 
   5994       __ Bind(&check_non_primitive_component_type);
   5995       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
   5996       __ j(kEqual, &done);
   5997       // Same comment as above regarding `temp` and the slow path.
   5998       // /* HeapReference<Class> */ temp = obj->klass_
   5999       GenerateReferenceLoadTwoRegisters(
   6000           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   6001       __ jmp(type_check_slow_path->GetEntryLabel());
   6002       __ Bind(&done);
   6003       break;
   6004     }
   6005 
   6006     case TypeCheckKind::kUnresolvedCheck:
   6007     case TypeCheckKind::kInterfaceCheck:
   6008       NearLabel done;
   6009       // Avoid null check if we know obj is not null.
   6010       if (instruction->MustDoNullCheck()) {
   6011         __ testl(obj, obj);
   6012         __ j(kEqual, &done);
   6013       }
   6014 
   6015       // /* HeapReference<Class> */ temp = obj->klass_
   6016       GenerateReferenceLoadTwoRegisters(
   6017           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
   6018 
   6019       // We always go into the type check slow path for the unresolved
   6020       // and interface check cases.
   6021       //
   6022       // We cannot directly call the CheckCast runtime entry point
   6023       // without resorting to a type checking slow path here (i.e. by
   6024       // calling InvokeRuntime directly), as it would require to
   6025       // assign fixed registers for the inputs of this HInstanceOf
   6026       // instruction (following the runtime calling convention), which
   6027       // might be cluttered by the potential first read barrier
   6028       // emission at the beginning of this method.
   6029       //
   6030       // TODO: Introduce a new runtime entry point taking the object
   6031       // to test (instead of its class) as argument, and let it deal
   6032       // with the read barrier issues. This will let us refactor this
   6033       // case of the `switch` code as it was previously (with a direct
   6034       // call to the runtime not using a type checking slow path).
   6035       // This should also be beneficial for the other cases above.
   6036       __ jmp(type_check_slow_path->GetEntryLabel());
   6037       __ Bind(&done);
   6038       break;
   6039   }
   6040 
   6041   __ Bind(type_check_slow_path->GetExitLabel());
   6042 }
   6043 
   6044 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   6045   LocationSummary* locations =
   6046       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   6047   InvokeRuntimeCallingConvention calling_convention;
   6048   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   6049 }
   6050 
   6051 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   6052   codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
   6053                                                  : QUICK_ENTRY_POINT(pUnlockObject),
   6054                           instruction,
   6055                           instruction->GetDexPc(),
   6056                           nullptr);
   6057   if (instruction->IsEnter()) {
   6058     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
   6059   } else {
   6060     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   6061   }
   6062 }
   6063 
   6064 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
   6065 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
   6066 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
   6067 
   6068 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
   6069   LocationSummary* locations =
   6070       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   6071   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
   6072          || instruction->GetResultType() == Primitive::kPrimLong);
   6073   locations->SetInAt(0, Location::RequiresRegister());
   6074   locations->SetInAt(1, Location::Any());
   6075   locations->SetOut(Location::SameAsFirstInput());
   6076 }
   6077 
   6078 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
   6079   HandleBitwiseOperation(instruction);
   6080 }
   6081 
   6082 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
   6083   HandleBitwiseOperation(instruction);
   6084 }
   6085 
   6086 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
   6087   HandleBitwiseOperation(instruction);
   6088 }
   6089 
   6090 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
   6091   LocationSummary* locations = instruction->GetLocations();
   6092   Location first = locations->InAt(0);
   6093   Location second = locations->InAt(1);
   6094   DCHECK(first.Equals(locations->Out()));
   6095 
   6096   if (instruction->GetResultType() == Primitive::kPrimInt) {
   6097     if (second.IsRegister()) {
   6098       if (instruction->IsAnd()) {
   6099         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6100       } else if (instruction->IsOr()) {
   6101         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6102       } else {
   6103         DCHECK(instruction->IsXor());
   6104         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
   6105       }
   6106     } else if (second.IsConstant()) {
   6107       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
   6108       if (instruction->IsAnd()) {
   6109         __ andl(first.AsRegister<CpuRegister>(), imm);
   6110       } else if (instruction->IsOr()) {
   6111         __ orl(first.AsRegister<CpuRegister>(), imm);
   6112       } else {
   6113         DCHECK(instruction->IsXor());
   6114         __ xorl(first.AsRegister<CpuRegister>(), imm);
   6115       }
   6116     } else {
   6117       Address address(CpuRegister(RSP), second.GetStackIndex());
   6118       if (instruction->IsAnd()) {
   6119         __ andl(first.AsRegister<CpuRegister>(), address);
   6120       } else if (instruction->IsOr()) {
   6121         __ orl(first.AsRegister<CpuRegister>(), address);
   6122       } else {
   6123         DCHECK(instruction->IsXor());
   6124         __ xorl(first.AsRegister<CpuRegister>(), address);
   6125       }
   6126     }
   6127   } else {
   6128     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
   6129     CpuRegister first_reg = first.AsRegister<CpuRegister>();
   6130     bool second_is_constant = false;
   6131     int64_t value = 0;
   6132     if (second.IsConstant()) {
   6133       second_is_constant = true;
   6134       value = second.GetConstant()->AsLongConstant()->GetValue();
   6135     }
   6136     bool is_int32_value = IsInt<32>(value);
   6137 
   6138     if (instruction->IsAnd()) {
   6139       if (second_is_constant) {
   6140         if (is_int32_value) {
   6141           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
   6142         } else {
   6143           __ andq(first_reg, codegen_->LiteralInt64Address(value));
   6144         }
   6145       } else if (second.IsDoubleStackSlot()) {
   6146         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6147       } else {
   6148         __ andq(first_reg, second.AsRegister<CpuRegister>());
   6149       }
   6150     } else if (instruction->IsOr()) {
   6151       if (second_is_constant) {
   6152         if (is_int32_value) {
   6153           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
   6154         } else {
   6155           __ orq(first_reg, codegen_->LiteralInt64Address(value));
   6156         }
   6157       } else if (second.IsDoubleStackSlot()) {
   6158         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6159       } else {
   6160         __ orq(first_reg, second.AsRegister<CpuRegister>());
   6161       }
   6162     } else {
   6163       DCHECK(instruction->IsXor());
   6164       if (second_is_constant) {
   6165         if (is_int32_value) {
   6166           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
   6167         } else {
   6168           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
   6169         }
   6170       } else if (second.IsDoubleStackSlot()) {
   6171         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
   6172       } else {
   6173         __ xorq(first_reg, second.AsRegister<CpuRegister>());
   6174       }
   6175     }
   6176   }
   6177 }
   6178 
   6179 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
   6180                                                                       Location out,
   6181                                                                       uint32_t offset,
   6182                                                                       Location maybe_temp) {
   6183   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   6184   if (kEmitCompilerReadBarrier) {
   6185     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
   6186     if (kUseBakerReadBarrier) {
   6187       // Load with fast path based Baker's read barrier.
   6188       // /* HeapReference<Object> */ out = *(out + offset)
   6189       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   6190           instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
   6191     } else {
   6192       // Load with slow path based read barrier.
   6193       // Save the value of `out` into `maybe_temp` before overwriting it
   6194       // in the following move operation, as we will need it for the
   6195       // read barrier below.
   6196       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
   6197       // /* HeapReference<Object> */ out = *(out + offset)
   6198       __ movl(out_reg, Address(out_reg, offset));
   6199       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
   6200     }
   6201   } else {
   6202     // Plain load with no read barrier.
   6203     // /* HeapReference<Object> */ out = *(out + offset)
   6204     __ movl(out_reg, Address(out_reg, offset));
   6205     __ MaybeUnpoisonHeapReference(out_reg);
   6206   }
   6207 }
   6208 
   6209 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
   6210                                                                        Location out,
   6211                                                                        Location obj,
   6212                                                                        uint32_t offset,
   6213                                                                        Location maybe_temp) {
   6214   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   6215   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
   6216   if (kEmitCompilerReadBarrier) {
   6217     if (kUseBakerReadBarrier) {
   6218       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
   6219       // Load with fast path based Baker's read barrier.
   6220       // /* HeapReference<Object> */ out = *(obj + offset)
   6221       codegen_->GenerateFieldLoadWithBakerReadBarrier(
   6222           instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
   6223     } else {
   6224       // Load with slow path based read barrier.
   6225       // /* HeapReference<Object> */ out = *(obj + offset)
   6226       __ movl(out_reg, Address(obj_reg, offset));
   6227       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
   6228     }
   6229   } else {
   6230     // Plain load with no read barrier.
   6231     // /* HeapReference<Object> */ out = *(obj + offset)
   6232     __ movl(out_reg, Address(obj_reg, offset));
   6233     __ MaybeUnpoisonHeapReference(out_reg);
   6234   }
   6235 }
   6236 
   6237 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
   6238                                                              Location root,
   6239                                                              const Address& address,
   6240                                                              Label* fixup_label) {
   6241   CpuRegister root_reg = root.AsRegister<CpuRegister>();
   6242   if (kEmitCompilerReadBarrier) {
   6243     if (kUseBakerReadBarrier) {
   6244       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
   6245       // Baker's read barrier are used:
   6246       //
   6247       //   root = *address;
   6248       //   if (Thread::Current()->GetIsGcMarking()) {
   6249       //     root = ReadBarrier::Mark(root)
   6250       //   }
   6251 
   6252       // /* GcRoot<mirror::Object> */ root = *address
   6253       __ movl(root_reg, address);
   6254       if (fixup_label != nullptr) {
   6255         __ Bind(fixup_label);
   6256       }
   6257       static_assert(
   6258           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
   6259           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
   6260           "have different sizes.");
   6261       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
   6262                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
   6263                     "have different sizes.");
   6264 
   6265       // Slow path used to mark the GC root `root`.
   6266       SlowPathCode* slow_path =
   6267           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
   6268       codegen_->AddSlowPath(slow_path);
   6269 
   6270       __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
   6271                                       /* no_rip */ true),
   6272                     Immediate(0));
   6273       __ j(kNotEqual, slow_path->GetEntryLabel());
   6274       __ Bind(slow_path->GetExitLabel());
   6275     } else {
   6276       // GC root loaded through a slow path for read barriers other
   6277       // than Baker's.
   6278       // /* GcRoot<mirror::Object>* */ root = address
   6279       __ leaq(root_reg, address);
   6280       if (fixup_label != nullptr) {
   6281         __ Bind(fixup_label);
   6282       }
   6283       // /* mirror::Object* */ root = root->Read()
   6284       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
   6285     }
   6286   } else {
   6287     // Plain GC root load with no read barrier.
   6288     // /* GcRoot<mirror::Object> */ root = *address
   6289     __ movl(root_reg, address);
   6290     if (fixup_label != nullptr) {
   6291       __ Bind(fixup_label);
   6292     }
   6293     // Note that GC roots are not affected by heap poisoning, thus we
   6294     // do not have to unpoison `root_reg` here.
   6295   }
   6296 }
   6297 
   6298 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
   6299                                                                 Location ref,
   6300                                                                 CpuRegister obj,
   6301                                                                 uint32_t offset,
   6302                                                                 Location temp,
   6303                                                                 bool needs_null_check) {
   6304   DCHECK(kEmitCompilerReadBarrier);
   6305   DCHECK(kUseBakerReadBarrier);
   6306 
   6307   // /* HeapReference<Object> */ ref = *(obj + offset)
   6308   Address src(obj, offset);
   6309   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
   6310 }
   6311 
   6312 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
   6313                                                                 Location ref,
   6314                                                                 CpuRegister obj,
   6315                                                                 uint32_t data_offset,
   6316                                                                 Location index,
   6317                                                                 Location temp,
   6318                                                                 bool needs_null_check) {
   6319   DCHECK(kEmitCompilerReadBarrier);
   6320   DCHECK(kUseBakerReadBarrier);
   6321 
   6322   // /* HeapReference<Object> */ ref =
   6323   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   6324   Address src = index.IsConstant() ?
   6325       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
   6326       Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
   6327   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
   6328 }
   6329 
   6330 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
   6331                                                                     Location ref,
   6332                                                                     CpuRegister obj,
   6333                                                                     const Address& src,
   6334                                                                     Location temp,
   6335                                                                     bool needs_null_check) {
   6336   DCHECK(kEmitCompilerReadBarrier);
   6337   DCHECK(kUseBakerReadBarrier);
   6338 
   6339   // In slow path based read barriers, the read barrier call is
   6340   // inserted after the original load. However, in fast path based
   6341   // Baker's read barriers, we need to perform the load of
   6342   // mirror::Object::monitor_ *before* the original reference load.
   6343   // This load-load ordering is required by the read barrier.
   6344   // The fast path/slow path (for Baker's algorithm) should look like:
   6345   //
   6346   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   6347   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   6348   //   HeapReference<Object> ref = *src;  // Original reference load.
   6349   //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
   6350   //   if (is_gray) {
   6351   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   6352   //   }
   6353   //
   6354   // Note: the original implementation in ReadBarrier::Barrier is
   6355   // slightly more complex as:
   6356   // - it implements the load-load fence using a data dependency on
   6357   //   the high-bits of rb_state, which are expected to be all zeroes
   6358   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
   6359   //   here, which is a no-op thanks to the x86-64 memory model);
   6360   // - it performs additional checks that we do not do here for
   6361   //   performance reasons.
   6362 
   6363   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
   6364   CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
   6365   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
   6366 
   6367   // /* int32_t */ monitor = obj->monitor_
   6368   __ movl(temp_reg, Address(obj, monitor_offset));
   6369   if (needs_null_check) {
   6370     MaybeRecordImplicitNullCheck(instruction);
   6371   }
   6372   // /* LockWord */ lock_word = LockWord(monitor)
   6373   static_assert(sizeof(LockWord) == sizeof(int32_t),
   6374                 "art::LockWord and int32_t have different sizes.");
   6375   // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
   6376   __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
   6377   __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
   6378   static_assert(
   6379       LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
   6380       "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
   6381 
   6382   // Load fence to prevent load-load reordering.
   6383   // Note that this is a no-op, thanks to the x86-64 memory model.
   6384   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   6385 
   6386   // The actual reference load.
   6387   // /* HeapReference<Object> */ ref = *src
   6388   __ movl(ref_reg, src);
   6389 
   6390   // Object* ref = ref_addr->AsMirrorPtr()
   6391   __ MaybeUnpoisonHeapReference(ref_reg);
   6392 
   6393   // Slow path used to mark the object `ref` when it is gray.
   6394   SlowPathCode* slow_path =
   6395       new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
   6396   AddSlowPath(slow_path);
   6397 
   6398   // if (rb_state == ReadBarrier::gray_ptr_)
   6399   //   ref = ReadBarrier::Mark(ref);
   6400   __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
   6401   __ j(kEqual, slow_path->GetEntryLabel());
   6402   __ Bind(slow_path->GetExitLabel());
   6403 }
   6404 
   6405 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
   6406                                                   Location out,
   6407                                                   Location ref,
   6408                                                   Location obj,
   6409                                                   uint32_t offset,
   6410                                                   Location index) {
   6411   DCHECK(kEmitCompilerReadBarrier);
   6412 
   6413   // Insert a slow path based read barrier *after* the reference load.
   6414   //
   6415   // If heap poisoning is enabled, the unpoisoning of the loaded
   6416   // reference will be carried out by the runtime within the slow
   6417   // path.
   6418   //
   6419   // Note that `ref` currently does not get unpoisoned (when heap
   6420   // poisoning is enabled), which is alright as the `ref` argument is
   6421   // not used by the artReadBarrierSlow entry point.
   6422   //
   6423   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
   6424   SlowPathCode* slow_path = new (GetGraph()->GetArena())
   6425       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
   6426   AddSlowPath(slow_path);
   6427 
   6428   __ jmp(slow_path->GetEntryLabel());
   6429   __ Bind(slow_path->GetExitLabel());
   6430 }
   6431 
   6432 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
   6433                                                        Location out,
   6434                                                        Location ref,
   6435                                                        Location obj,
   6436                                                        uint32_t offset,
   6437                                                        Location index) {
   6438   if (kEmitCompilerReadBarrier) {
   6439     // Baker's read barriers shall be handled by the fast path
   6440     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
   6441     DCHECK(!kUseBakerReadBarrier);
   6442     // If heap poisoning is enabled, unpoisoning will be taken care of
   6443     // by the runtime within the slow path.
   6444     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   6445   } else if (kPoisonHeapReferences) {
   6446     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
   6447   }
   6448 }
   6449 
   6450 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
   6451                                                          Location out,
   6452                                                          Location root) {
   6453   DCHECK(kEmitCompilerReadBarrier);
   6454 
   6455   // Insert a slow path based read barrier *after* the GC root load.
   6456   //
   6457   // Note that GC roots are not affected by heap poisoning, so we do
   6458   // not need to do anything special for this here.
   6459   SlowPathCode* slow_path =
   6460       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
   6461   AddSlowPath(slow_path);
   6462 
   6463   __ jmp(slow_path->GetEntryLabel());
   6464   __ Bind(slow_path->GetExitLabel());
   6465 }
   6466 
   6467 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   6468   // Nothing to do, this should be removed during prepare for register allocator.
   6469   LOG(FATAL) << "Unreachable";
   6470 }
   6471 
   6472 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   6473   // Nothing to do, this should be removed during prepare for register allocator.
   6474   LOG(FATAL) << "Unreachable";
   6475 }
   6476 
   6477 // Simple implementation of packed switch - generate cascaded compare/jumps.
   6478 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   6479   LocationSummary* locations =
   6480       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   6481   locations->SetInAt(0, Location::RequiresRegister());
   6482   locations->AddTemp(Location::RequiresRegister());
   6483   locations->AddTemp(Location::RequiresRegister());
   6484 }
   6485 
   6486 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   6487   int32_t lower_bound = switch_instr->GetStartValue();
   6488   uint32_t num_entries = switch_instr->GetNumEntries();
   6489   LocationSummary* locations = switch_instr->GetLocations();
   6490   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   6491   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   6492   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   6493   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   6494 
   6495   // Should we generate smaller inline compare/jumps?
   6496   if (num_entries <= kPackedSwitchJumpTableThreshold) {
   6497     // Figure out the correct compare values and jump conditions.
   6498     // Handle the first compare/branch as a special case because it might
   6499     // jump to the default case.
   6500     DCHECK_GT(num_entries, 2u);
   6501     Condition first_condition;
   6502     uint32_t index;
   6503     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   6504     if (lower_bound != 0) {
   6505       first_condition = kLess;
   6506       __ cmpl(value_reg_in, Immediate(lower_bound));
   6507       __ j(first_condition, codegen_->GetLabelOf(default_block));
   6508       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
   6509 
   6510       index = 1;
   6511     } else {
   6512       // Handle all the compare/jumps below.
   6513       first_condition = kBelow;
   6514       index = 0;
   6515     }
   6516 
   6517     // Handle the rest of the compare/jumps.
   6518     for (; index + 1 < num_entries; index += 2) {
   6519       int32_t compare_to_value = lower_bound + index + 1;
   6520       __ cmpl(value_reg_in, Immediate(compare_to_value));
   6521       // Jump to successors[index] if value < case_value[index].
   6522       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
   6523       // Jump to successors[index + 1] if value == case_value[index + 1].
   6524       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
   6525     }
   6526 
   6527     if (index != num_entries) {
   6528       // There are an odd number of entries. Handle the last one.
   6529       DCHECK_EQ(index + 1, num_entries);
   6530       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
   6531       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   6532     }
   6533 
   6534     // And the default for any other value.
   6535     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
   6536       __ jmp(codegen_->GetLabelOf(default_block));
   6537     }
   6538     return;
   6539   }
   6540 
   6541   // Remove the bias, if needed.
   6542   Register value_reg_out = value_reg_in.AsRegister();
   6543   if (lower_bound != 0) {
   6544     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
   6545     value_reg_out = temp_reg.AsRegister();
   6546   }
   6547   CpuRegister value_reg(value_reg_out);
   6548 
   6549   // Is the value in range?
   6550   __ cmpl(value_reg, Immediate(num_entries - 1));
   6551   __ j(kAbove, codegen_->GetLabelOf(default_block));
   6552 
   6553   // We are in the range of the table.
   6554   // Load the address of the jump table in the constant area.
   6555   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
   6556 
   6557   // Load the (signed) offset from the jump table.
   6558   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
   6559 
   6560   // Add the offset to the address of the table base.
   6561   __ addq(temp_reg, base_reg);
   6562 
   6563   // And jump.
   6564   __ jmp(temp_reg);
   6565 }
   6566 
   6567 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
   6568   if (value == 0) {
   6569     __ xorl(dest, dest);
   6570   } else {
   6571     __ movl(dest, Immediate(value));
   6572   }
   6573 }
   6574 
   6575 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
   6576   if (value == 0) {
   6577     // Clears upper bits too.
   6578     __ xorl(dest, dest);
   6579   } else if (IsUint<32>(value)) {
   6580     // We can use a 32 bit move, as it will zero-extend and is shorter.
   6581     __ movl(dest, Immediate(static_cast<int32_t>(value)));
   6582   } else {
   6583     __ movq(dest, Immediate(value));
   6584   }
   6585 }
   6586 
   6587 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
   6588   if (value == 0) {
   6589     __ xorps(dest, dest);
   6590   } else {
   6591     __ movss(dest, LiteralInt32Address(value));
   6592   }
   6593 }
   6594 
   6595 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
   6596   if (value == 0) {
   6597     __ xorpd(dest, dest);
   6598   } else {
   6599     __ movsd(dest, LiteralInt64Address(value));
   6600   }
   6601 }
   6602 
   6603 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
   6604   Load32BitValue(dest, bit_cast<int32_t, float>(value));
   6605 }
   6606 
   6607 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
   6608   Load64BitValue(dest, bit_cast<int64_t, double>(value));
   6609 }
   6610 
   6611 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
   6612   if (value == 0) {
   6613     __ testl(dest, dest);
   6614   } else {
   6615     __ cmpl(dest, Immediate(value));
   6616   }
   6617 }
   6618 
   6619 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
   6620   if (IsInt<32>(value)) {
   6621     if (value == 0) {
   6622       __ testq(dest, dest);
   6623     } else {
   6624       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
   6625     }
   6626   } else {
   6627     // Value won't fit in an int.
   6628     __ cmpq(dest, LiteralInt64Address(value));
   6629   }
   6630 }
   6631 
   6632 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
   6633   DCHECK(dest.IsDoubleStackSlot());
   6634   if (IsInt<32>(value)) {
   6635     // Can move directly as an int32 constant.
   6636     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
   6637             Immediate(static_cast<int32_t>(value)));
   6638   } else {
   6639     Load64BitValue(CpuRegister(TMP), value);
   6640     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
   6641   }
   6642 }
   6643 
   6644 /**
   6645  * Class to handle late fixup of offsets into constant area.
   6646  */
   6647 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
   6648  public:
   6649   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
   6650       : codegen_(&codegen), offset_into_constant_area_(offset) {}
   6651 
   6652  protected:
   6653   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
   6654 
   6655   CodeGeneratorX86_64* codegen_;
   6656 
   6657  private:
   6658   void Process(const MemoryRegion& region, int pos) OVERRIDE {
   6659     // Patch the correct offset for the instruction.  We use the address of the
   6660     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
   6661     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
   6662     int32_t relative_position = constant_offset - pos;
   6663 
   6664     // Patch in the right value.
   6665     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
   6666   }
   6667 
   6668   // Location in constant area that the fixup refers to.
   6669   size_t offset_into_constant_area_;
   6670 };
   6671 
   6672 /**
   6673  t * Class to handle late fixup of offsets to a jump table that will be created in the
   6674  * constant area.
   6675  */
   6676 class JumpTableRIPFixup : public RIPFixup {
   6677  public:
   6678   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
   6679       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
   6680 
   6681   void CreateJumpTable() {
   6682     X86_64Assembler* assembler = codegen_->GetAssembler();
   6683 
   6684     // Ensure that the reference to the jump table has the correct offset.
   6685     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
   6686     SetOffset(offset_in_constant_table);
   6687 
   6688     // Compute the offset from the start of the function to this jump table.
   6689     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
   6690 
   6691     // Populate the jump table with the correct values for the jump table.
   6692     int32_t num_entries = switch_instr_->GetNumEntries();
   6693     HBasicBlock* block = switch_instr_->GetBlock();
   6694     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
   6695     // The value that we want is the target offset - the position of the table.
   6696     for (int32_t i = 0; i < num_entries; i++) {
   6697       HBasicBlock* b = successors[i];
   6698       Label* l = codegen_->GetLabelOf(b);
   6699       DCHECK(l->IsBound());
   6700       int32_t offset_to_block = l->Position() - current_table_offset;
   6701       assembler->AppendInt32(offset_to_block);
   6702     }
   6703   }
   6704 
   6705  private:
   6706   const HPackedSwitch* switch_instr_;
   6707 };
   6708 
   6709 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   6710   // Generate the constant area if needed.
   6711   X86_64Assembler* assembler = GetAssembler();
   6712   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
   6713     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
   6714     assembler->Align(4, 0);
   6715     constant_area_start_ = assembler->CodeSize();
   6716 
   6717     // Populate any jump tables.
   6718     for (auto jump_table : fixups_to_jump_tables_) {
   6719       jump_table->CreateJumpTable();
   6720     }
   6721 
   6722     // And now add the constant area to the generated code.
   6723     assembler->AddConstantArea();
   6724   }
   6725 
   6726   // And finish up.
   6727   CodeGenerator::Finalize(allocator);
   6728 }
   6729 
   6730 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
   6731   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
   6732   return Address::RIP(fixup);
   6733 }
   6734 
   6735 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
   6736   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
   6737   return Address::RIP(fixup);
   6738 }
   6739 
   6740 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
   6741   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
   6742   return Address::RIP(fixup);
   6743 }
   6744 
   6745 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
   6746   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
   6747   return Address::RIP(fixup);
   6748 }
   6749 
   6750 // TODO: trg as memory.
   6751 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   6752   if (!trg.IsValid()) {
   6753     DCHECK_EQ(type, Primitive::kPrimVoid);
   6754     return;
   6755   }
   6756 
   6757   DCHECK_NE(type, Primitive::kPrimVoid);
   6758 
   6759   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
   6760   if (trg.Equals(return_loc)) {
   6761     return;
   6762   }
   6763 
   6764   // Let the parallel move resolver take care of all of this.
   6765   HParallelMove parallel_move(GetGraph()->GetArena());
   6766   parallel_move.AddMove(return_loc, trg, type, nullptr);
   6767   GetMoveResolver()->EmitNativeCode(&parallel_move);
   6768 }
   6769 
   6770 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
   6771   // Create a fixup to be used to create and address the jump table.
   6772   JumpTableRIPFixup* table_fixup =
   6773       new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
   6774 
   6775   // We have to populate the jump tables.
   6776   fixups_to_jump_tables_.push_back(table_fixup);
   6777   return Address::RIP(table_fixup);
   6778 }
   6779 
   6780 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
   6781                                              const Address& addr_high,
   6782                                              int64_t v,
   6783                                              HInstruction* instruction) {
   6784   if (IsInt<32>(v)) {
   6785     int32_t v_32 = v;
   6786     __ movq(addr_low, Immediate(v_32));
   6787     MaybeRecordImplicitNullCheck(instruction);
   6788   } else {
   6789     // Didn't fit in a register.  Do it in pieces.
   6790     int32_t low_v = Low32Bits(v);
   6791     int32_t high_v = High32Bits(v);
   6792     __ movl(addr_low, Immediate(low_v));
   6793     MaybeRecordImplicitNullCheck(instruction);
   6794     __ movl(addr_high, Immediate(high_v));
   6795   }
   6796 }
   6797 
   6798 #undef __
   6799 
   6800 }  // namespace x86_64
   6801 }  // namespace art
   6802