Home | History | Annotate | Download | only in x86_64
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "assembler_x86_64.h"
     18 
     19 #include "base/casts.h"
     20 #include "entrypoints/quick/quick_entrypoints.h"
     21 #include "memory_region.h"
     22 #include "thread.h"
     23 
     24 namespace art {
     25 namespace x86_64 {
     26 
     27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
     28   return os << reg.AsRegister();
     29 }
     30 
     31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
     32   return os << reg.AsFloatRegister();
     33 }
     34 
     35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
     36   return os << "ST" << static_cast<int>(reg);
     37 }
     38 
     39 void X86_64Assembler::call(CpuRegister reg) {
     40   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     41   EmitOptionalRex32(reg);
     42   EmitUint8(0xFF);
     43   EmitRegisterOperand(2, reg.LowBits());
     44 }
     45 
     46 
     47 void X86_64Assembler::call(const Address& address) {
     48   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     49   EmitOptionalRex32(address);
     50   EmitUint8(0xFF);
     51   EmitOperand(2, address);
     52 }
     53 
     54 
     55 void X86_64Assembler::call(Label* label) {
     56   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     57   EmitUint8(0xE8);
     58   static const int kSize = 5;
     59   // Offset by one because we already have emitted the opcode.
     60   EmitLabel(label, kSize - 1);
     61 }
     62 
     63 void X86_64Assembler::pushq(CpuRegister reg) {
     64   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     65   EmitOptionalRex32(reg);
     66   EmitUint8(0x50 + reg.LowBits());
     67 }
     68 
     69 
     70 void X86_64Assembler::pushq(const Address& address) {
     71   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     72   EmitOptionalRex32(address);
     73   EmitUint8(0xFF);
     74   EmitOperand(6, address);
     75 }
     76 
     77 
     78 void X86_64Assembler::pushq(const Immediate& imm) {
     79   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     80   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
     81   if (imm.is_int8()) {
     82     EmitUint8(0x6A);
     83     EmitUint8(imm.value() & 0xFF);
     84   } else {
     85     EmitUint8(0x68);
     86     EmitImmediate(imm);
     87   }
     88 }
     89 
     90 
     91 void X86_64Assembler::popq(CpuRegister reg) {
     92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     93   EmitOptionalRex32(reg);
     94   EmitUint8(0x58 + reg.LowBits());
     95 }
     96 
     97 
     98 void X86_64Assembler::popq(const Address& address) {
     99   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    100   EmitOptionalRex32(address);
    101   EmitUint8(0x8F);
    102   EmitOperand(0, address);
    103 }
    104 
    105 
    106 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
    107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    108   if (imm.is_int32()) {
    109     // 32 bit. Note: sign-extends.
    110     EmitRex64(dst);
    111     EmitUint8(0xC7);
    112     EmitRegisterOperand(0, dst.LowBits());
    113     EmitInt32(static_cast<int32_t>(imm.value()));
    114   } else {
    115     EmitRex64(dst);
    116     EmitUint8(0xB8 + dst.LowBits());
    117     EmitInt64(imm.value());
    118   }
    119 }
    120 
    121 
    122 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
    123   CHECK(imm.is_int32());
    124   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    125   EmitOptionalRex32(dst);
    126   EmitUint8(0xB8 + dst.LowBits());
    127   EmitImmediate(imm);
    128 }
    129 
    130 
    131 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
    132   CHECK(imm.is_int32());
    133   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    134   EmitRex64(dst);
    135   EmitUint8(0xC7);
    136   EmitOperand(0, dst);
    137   EmitImmediate(imm);
    138 }
    139 
    140 
    141 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
    142   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    143   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
    144   EmitRex64(src, dst);
    145   EmitUint8(0x89);
    146   EmitRegisterOperand(src.LowBits(), dst.LowBits());
    147 }
    148 
    149 
    150 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
    151   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    152   EmitOptionalRex32(dst, src);
    153   EmitUint8(0x8B);
    154   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    155 }
    156 
    157 
    158 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
    159   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    160   EmitRex64(dst, src);
    161   EmitUint8(0x8B);
    162   EmitOperand(dst.LowBits(), src);
    163 }
    164 
    165 
    166 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
    167   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    168   EmitOptionalRex32(dst, src);
    169   EmitUint8(0x8B);
    170   EmitOperand(dst.LowBits(), src);
    171 }
    172 
    173 
    174 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
    175   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    176   EmitRex64(src, dst);
    177   EmitUint8(0x89);
    178   EmitOperand(src.LowBits(), dst);
    179 }
    180 
    181 
    182 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
    183   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    184   EmitOptionalRex32(src, dst);
    185   EmitUint8(0x89);
    186   EmitOperand(src.LowBits(), dst);
    187 }
    188 
    189 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
    190   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    191   EmitOptionalRex32(dst);
    192   EmitUint8(0xC7);
    193   EmitOperand(0, dst);
    194   EmitImmediate(imm);
    195 }
    196 
    197 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
    198   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    199   EmitOptionalRex32(src, dst);
    200   EmitUint8(0x0F);
    201   EmitUint8(0xC3);
    202   EmitOperand(src.LowBits(), dst);
    203 }
    204 
    205 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
    206   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    207   EmitRex64(src, dst);
    208   EmitUint8(0x0F);
    209   EmitUint8(0xC3);
    210   EmitOperand(src.LowBits(), dst);
    211 }
    212 
    213 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
    214   cmov(c, dst, src, true);
    215 }
    216 
    217 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
    218   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    219   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
    220   EmitUint8(0x0F);
    221   EmitUint8(0x40 + c);
    222   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    223 }
    224 
    225 
    226 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
    227   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    228   if (is64bit) {
    229     EmitRex64(dst, src);
    230   } else {
    231     EmitOptionalRex32(dst, src);
    232   }
    233   EmitUint8(0x0F);
    234   EmitUint8(0x40 + c);
    235   EmitOperand(dst.LowBits(), src);
    236 }
    237 
    238 
    239 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
    240   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    241   EmitOptionalByteRegNormalizingRex32(dst, src);
    242   EmitUint8(0x0F);
    243   EmitUint8(0xB6);
    244   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    245 }
    246 
    247 
    248 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
    249   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    250   // Byte register is only in the source register form, so we don't use
    251   // EmitOptionalByteRegNormalizingRex32(dst, src);
    252   EmitOptionalRex32(dst, src);
    253   EmitUint8(0x0F);
    254   EmitUint8(0xB6);
    255   EmitOperand(dst.LowBits(), src);
    256 }
    257 
    258 
    259 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
    260   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    261   EmitOptionalByteRegNormalizingRex32(dst, src);
    262   EmitUint8(0x0F);
    263   EmitUint8(0xBE);
    264   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    265 }
    266 
    267 
    268 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
    269   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    270   // Byte register is only in the source register form, so we don't use
    271   // EmitOptionalByteRegNormalizingRex32(dst, src);
    272   EmitOptionalRex32(dst, src);
    273   EmitUint8(0x0F);
    274   EmitUint8(0xBE);
    275   EmitOperand(dst.LowBits(), src);
    276 }
    277 
    278 
    279 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
    280   LOG(FATAL) << "Use movzxb or movsxb instead.";
    281 }
    282 
    283 
    284 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
    285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    286   EmitOptionalByteRegNormalizingRex32(src, dst);
    287   EmitUint8(0x88);
    288   EmitOperand(src.LowBits(), dst);
    289 }
    290 
    291 
    292 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
    293   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    294   EmitOptionalRex32(dst);
    295   EmitUint8(0xC6);
    296   EmitOperand(Register::RAX, dst);
    297   CHECK(imm.is_int8());
    298   EmitUint8(imm.value() & 0xFF);
    299 }
    300 
    301 
    302 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
    303   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    304   EmitOptionalRex32(dst, src);
    305   EmitUint8(0x0F);
    306   EmitUint8(0xB7);
    307   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    308 }
    309 
    310 
    311 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
    312   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    313   EmitOptionalRex32(dst, src);
    314   EmitUint8(0x0F);
    315   EmitUint8(0xB7);
    316   EmitOperand(dst.LowBits(), src);
    317 }
    318 
    319 
    320 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
    321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    322   EmitOptionalRex32(dst, src);
    323   EmitUint8(0x0F);
    324   EmitUint8(0xBF);
    325   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    326 }
    327 
    328 
    329 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
    330   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    331   EmitOptionalRex32(dst, src);
    332   EmitUint8(0x0F);
    333   EmitUint8(0xBF);
    334   EmitOperand(dst.LowBits(), src);
    335 }
    336 
    337 
    338 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
    339   LOG(FATAL) << "Use movzxw or movsxw instead.";
    340 }
    341 
    342 
    343 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
    344   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    345   EmitOperandSizeOverride();
    346   EmitOptionalRex32(src, dst);
    347   EmitUint8(0x89);
    348   EmitOperand(src.LowBits(), dst);
    349 }
    350 
    351 
    352 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
    353   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    354   EmitOperandSizeOverride();
    355   EmitOptionalRex32(dst);
    356   EmitUint8(0xC7);
    357   EmitOperand(Register::RAX, dst);
    358   CHECK(imm.is_uint16() || imm.is_int16());
    359   EmitUint8(imm.value() & 0xFF);
    360   EmitUint8(imm.value() >> 8);
    361 }
    362 
    363 
    364 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
    365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    366   EmitRex64(dst, src);
    367   EmitUint8(0x8D);
    368   EmitOperand(dst.LowBits(), src);
    369 }
    370 
    371 
    372 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
    373   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    374   EmitOptionalRex32(dst, src);
    375   EmitUint8(0x8D);
    376   EmitOperand(dst.LowBits(), src);
    377 }
    378 
    379 
    380 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
    381   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    382   EmitOptionalRex32(dst, src);
    383   EmitUint8(0x0F);
    384   EmitUint8(0x28);
    385   EmitXmmRegisterOperand(dst.LowBits(), src);
    386 }
    387 
    388 
    389 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
    390   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    391   EmitUint8(0xF3);
    392   EmitOptionalRex32(dst, src);
    393   EmitUint8(0x0F);
    394   EmitUint8(0x10);
    395   EmitOperand(dst.LowBits(), src);
    396 }
    397 
    398 
    399 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
    400   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    401   EmitUint8(0xF3);
    402   EmitOptionalRex32(src, dst);
    403   EmitUint8(0x0F);
    404   EmitUint8(0x11);
    405   EmitOperand(src.LowBits(), dst);
    406 }
    407 
    408 
    409 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
    410   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    411   EmitUint8(0xF3);
    412   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
    413   EmitUint8(0x0F);
    414   EmitUint8(0x11);
    415   EmitXmmRegisterOperand(src.LowBits(), dst);
    416 }
    417 
    418 
    419 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
    420   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    421   EmitRex64(dst, src);
    422   EmitUint8(0x63);
    423   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    424 }
    425 
    426 
    427 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
    428   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    429   EmitRex64(dst, src);
    430   EmitUint8(0x63);
    431   EmitOperand(dst.LowBits(), src);
    432 }
    433 
    434 
    435 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
    436   movd(dst, src, true);
    437 }
    438 
    439 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
    440   movd(dst, src, true);
    441 }
    442 
    443 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
    444   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    445   EmitUint8(0x66);
    446   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
    447   EmitUint8(0x0F);
    448   EmitUint8(0x6E);
    449   EmitOperand(dst.LowBits(), Operand(src));
    450 }
    451 
    452 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
    453   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    454   EmitUint8(0x66);
    455   EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
    456   EmitUint8(0x0F);
    457   EmitUint8(0x7E);
    458   EmitOperand(src.LowBits(), Operand(dst));
    459 }
    460 
    461 
    462 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
    463   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    464   EmitUint8(0xF3);
    465   EmitOptionalRex32(dst, src);
    466   EmitUint8(0x0F);
    467   EmitUint8(0x58);
    468   EmitXmmRegisterOperand(dst.LowBits(), src);
    469 }
    470 
    471 
    472 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
    473   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    474   EmitUint8(0xF3);
    475   EmitOptionalRex32(dst, src);
    476   EmitUint8(0x0F);
    477   EmitUint8(0x58);
    478   EmitOperand(dst.LowBits(), src);
    479 }
    480 
    481 
    482 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
    483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    484   EmitUint8(0xF3);
    485   EmitOptionalRex32(dst, src);
    486   EmitUint8(0x0F);
    487   EmitUint8(0x5C);
    488   EmitXmmRegisterOperand(dst.LowBits(), src);
    489 }
    490 
    491 
    492 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
    493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    494   EmitUint8(0xF3);
    495   EmitOptionalRex32(dst, src);
    496   EmitUint8(0x0F);
    497   EmitUint8(0x5C);
    498   EmitOperand(dst.LowBits(), src);
    499 }
    500 
    501 
    502 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
    503   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    504   EmitUint8(0xF3);
    505   EmitOptionalRex32(dst, src);
    506   EmitUint8(0x0F);
    507   EmitUint8(0x59);
    508   EmitXmmRegisterOperand(dst.LowBits(), src);
    509 }
    510 
    511 
    512 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
    513   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    514   EmitUint8(0xF3);
    515   EmitOptionalRex32(dst, src);
    516   EmitUint8(0x0F);
    517   EmitUint8(0x59);
    518   EmitOperand(dst.LowBits(), src);
    519 }
    520 
    521 
    522 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
    523   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    524   EmitUint8(0xF3);
    525   EmitOptionalRex32(dst, src);
    526   EmitUint8(0x0F);
    527   EmitUint8(0x5E);
    528   EmitXmmRegisterOperand(dst.LowBits(), src);
    529 }
    530 
    531 
    532 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
    533   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    534   EmitUint8(0xF3);
    535   EmitOptionalRex32(dst, src);
    536   EmitUint8(0x0F);
    537   EmitUint8(0x5E);
    538   EmitOperand(dst.LowBits(), src);
    539 }
    540 
    541 
    542 void X86_64Assembler::flds(const Address& src) {
    543   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    544   EmitUint8(0xD9);
    545   EmitOperand(0, src);
    546 }
    547 
    548 
    549 void X86_64Assembler::fsts(const Address& dst) {
    550   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    551   EmitUint8(0xD9);
    552   EmitOperand(2, dst);
    553 }
    554 
    555 
    556 void X86_64Assembler::fstps(const Address& dst) {
    557   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    558   EmitUint8(0xD9);
    559   EmitOperand(3, dst);
    560 }
    561 
    562 
    563 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
    564   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    565   EmitUint8(0xF2);
    566   EmitOptionalRex32(dst, src);
    567   EmitUint8(0x0F);
    568   EmitUint8(0x10);
    569   EmitOperand(dst.LowBits(), src);
    570 }
    571 
    572 
    573 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
    574   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    575   EmitUint8(0xF2);
    576   EmitOptionalRex32(src, dst);
    577   EmitUint8(0x0F);
    578   EmitUint8(0x11);
    579   EmitOperand(src.LowBits(), dst);
    580 }
    581 
    582 
    583 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
    584   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    585   EmitUint8(0xF2);
    586   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
    587   EmitUint8(0x0F);
    588   EmitUint8(0x11);
    589   EmitXmmRegisterOperand(src.LowBits(), dst);
    590 }
    591 
    592 
    593 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
    594   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    595   EmitUint8(0xF2);
    596   EmitOptionalRex32(dst, src);
    597   EmitUint8(0x0F);
    598   EmitUint8(0x58);
    599   EmitXmmRegisterOperand(dst.LowBits(), src);
    600 }
    601 
    602 
    603 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
    604   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    605   EmitUint8(0xF2);
    606   EmitOptionalRex32(dst, src);
    607   EmitUint8(0x0F);
    608   EmitUint8(0x58);
    609   EmitOperand(dst.LowBits(), src);
    610 }
    611 
    612 
    613 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
    614   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    615   EmitUint8(0xF2);
    616   EmitOptionalRex32(dst, src);
    617   EmitUint8(0x0F);
    618   EmitUint8(0x5C);
    619   EmitXmmRegisterOperand(dst.LowBits(), src);
    620 }
    621 
    622 
    623 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
    624   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    625   EmitUint8(0xF2);
    626   EmitOptionalRex32(dst, src);
    627   EmitUint8(0x0F);
    628   EmitUint8(0x5C);
    629   EmitOperand(dst.LowBits(), src);
    630 }
    631 
    632 
    633 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
    634   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    635   EmitUint8(0xF2);
    636   EmitOptionalRex32(dst, src);
    637   EmitUint8(0x0F);
    638   EmitUint8(0x59);
    639   EmitXmmRegisterOperand(dst.LowBits(), src);
    640 }
    641 
    642 
    643 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
    644   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    645   EmitUint8(0xF2);
    646   EmitOptionalRex32(dst, src);
    647   EmitUint8(0x0F);
    648   EmitUint8(0x59);
    649   EmitOperand(dst.LowBits(), src);
    650 }
    651 
    652 
    653 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
    654   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    655   EmitUint8(0xF2);
    656   EmitOptionalRex32(dst, src);
    657   EmitUint8(0x0F);
    658   EmitUint8(0x5E);
    659   EmitXmmRegisterOperand(dst.LowBits(), src);
    660 }
    661 
    662 
    663 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
    664   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    665   EmitUint8(0xF2);
    666   EmitOptionalRex32(dst, src);
    667   EmitUint8(0x0F);
    668   EmitUint8(0x5E);
    669   EmitOperand(dst.LowBits(), src);
    670 }
    671 
    672 
    673 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
    674   cvtsi2ss(dst, src, false);
    675 }
    676 
    677 
    678 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
    679   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    680   EmitUint8(0xF3);
    681   if (is64bit) {
    682     // Emit a REX.W prefix if the operand size is 64 bits.
    683     EmitRex64(dst, src);
    684   } else {
    685     EmitOptionalRex32(dst, src);
    686   }
    687   EmitUint8(0x0F);
    688   EmitUint8(0x2A);
    689   EmitOperand(dst.LowBits(), Operand(src));
    690 }
    691 
    692 
    693 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
    694   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    695   EmitUint8(0xF3);
    696   if (is64bit) {
    697     // Emit a REX.W prefix if the operand size is 64 bits.
    698     EmitRex64(dst, src);
    699   } else {
    700     EmitOptionalRex32(dst, src);
    701   }
    702   EmitUint8(0x0F);
    703   EmitUint8(0x2A);
    704   EmitOperand(dst.LowBits(), src);
    705 }
    706 
    707 
    708 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
    709   cvtsi2sd(dst, src, false);
    710 }
    711 
    712 
    713 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
    714   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    715   EmitUint8(0xF2);
    716   if (is64bit) {
    717     // Emit a REX.W prefix if the operand size is 64 bits.
    718     EmitRex64(dst, src);
    719   } else {
    720     EmitOptionalRex32(dst, src);
    721   }
    722   EmitUint8(0x0F);
    723   EmitUint8(0x2A);
    724   EmitOperand(dst.LowBits(), Operand(src));
    725 }
    726 
    727 
    728 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
    729   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    730   EmitUint8(0xF2);
    731   if (is64bit) {
    732     // Emit a REX.W prefix if the operand size is 64 bits.
    733     EmitRex64(dst, src);
    734   } else {
    735     EmitOptionalRex32(dst, src);
    736   }
    737   EmitUint8(0x0F);
    738   EmitUint8(0x2A);
    739   EmitOperand(dst.LowBits(), src);
    740 }
    741 
    742 
    743 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
    744   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    745   EmitUint8(0xF3);
    746   EmitOptionalRex32(dst, src);
    747   EmitUint8(0x0F);
    748   EmitUint8(0x2D);
    749   EmitXmmRegisterOperand(dst.LowBits(), src);
    750 }
    751 
    752 
    753 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
    754   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    755   EmitUint8(0xF3);
    756   EmitOptionalRex32(dst, src);
    757   EmitUint8(0x0F);
    758   EmitUint8(0x5A);
    759   EmitXmmRegisterOperand(dst.LowBits(), src);
    760 }
    761 
    762 
    763 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
    764   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    765   EmitUint8(0xF3);
    766   EmitOptionalRex32(dst, src);
    767   EmitUint8(0x0F);
    768   EmitUint8(0x5A);
    769   EmitOperand(dst.LowBits(), src);
    770 }
    771 
    772 
    773 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
    774   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    775   EmitUint8(0xF2);
    776   EmitOptionalRex32(dst, src);
    777   EmitUint8(0x0F);
    778   EmitUint8(0x2D);
    779   EmitXmmRegisterOperand(dst.LowBits(), src);
    780 }
    781 
    782 
    783 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
    784   cvttss2si(dst, src, false);
    785 }
    786 
    787 
    788 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
    789   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    790   EmitUint8(0xF3);
    791   if (is64bit) {
    792     // Emit a REX.W prefix if the operand size is 64 bits.
    793     EmitRex64(dst, src);
    794   } else {
    795     EmitOptionalRex32(dst, src);
    796   }
    797   EmitUint8(0x0F);
    798   EmitUint8(0x2C);
    799   EmitXmmRegisterOperand(dst.LowBits(), src);
    800 }
    801 
    802 
    803 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
    804   cvttsd2si(dst, src, false);
    805 }
    806 
    807 
    808 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
    809   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    810   EmitUint8(0xF2);
    811   if (is64bit) {
    812     // Emit a REX.W prefix if the operand size is 64 bits.
    813     EmitRex64(dst, src);
    814   } else {
    815     EmitOptionalRex32(dst, src);
    816   }
    817   EmitUint8(0x0F);
    818   EmitUint8(0x2C);
    819   EmitXmmRegisterOperand(dst.LowBits(), src);
    820 }
    821 
    822 
    823 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
    824   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    825   EmitUint8(0xF2);
    826   EmitOptionalRex32(dst, src);
    827   EmitUint8(0x0F);
    828   EmitUint8(0x5A);
    829   EmitXmmRegisterOperand(dst.LowBits(), src);
    830 }
    831 
    832 
    833 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
    834   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    835   EmitUint8(0xF2);
    836   EmitOptionalRex32(dst, src);
    837   EmitUint8(0x0F);
    838   EmitUint8(0x5A);
    839   EmitOperand(dst.LowBits(), src);
    840 }
    841 
    842 
    843 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
    844   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    845   EmitUint8(0xF3);
    846   EmitOptionalRex32(dst, src);
    847   EmitUint8(0x0F);
    848   EmitUint8(0xE6);
    849   EmitXmmRegisterOperand(dst.LowBits(), src);
    850 }
    851 
    852 
    853 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
    854   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    855   EmitOptionalRex32(a, b);
    856   EmitUint8(0x0F);
    857   EmitUint8(0x2F);
    858   EmitXmmRegisterOperand(a.LowBits(), b);
    859 }
    860 
    861 
    862 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
    863   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    864   EmitOptionalRex32(a, b);
    865   EmitUint8(0x0F);
    866   EmitUint8(0x2F);
    867   EmitOperand(a.LowBits(), b);
    868 }
    869 
    870 
    871 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
    872   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    873   EmitUint8(0x66);
    874   EmitOptionalRex32(a, b);
    875   EmitUint8(0x0F);
    876   EmitUint8(0x2F);
    877   EmitXmmRegisterOperand(a.LowBits(), b);
    878 }
    879 
    880 
    881 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
    882   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    883   EmitUint8(0x66);
    884   EmitOptionalRex32(a, b);
    885   EmitUint8(0x0F);
    886   EmitUint8(0x2F);
    887   EmitOperand(a.LowBits(), b);
    888 }
    889 
    890 
    891 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
    892   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    893   EmitOptionalRex32(a, b);
    894   EmitUint8(0x0F);
    895   EmitUint8(0x2E);
    896   EmitXmmRegisterOperand(a.LowBits(), b);
    897 }
    898 
    899 
    900 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
    901   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    902   EmitOptionalRex32(a, b);
    903   EmitUint8(0x0F);
    904   EmitUint8(0x2E);
    905   EmitOperand(a.LowBits(), b);
    906 }
    907 
    908 
    909 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
    910   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    911   EmitUint8(0x66);
    912   EmitOptionalRex32(a, b);
    913   EmitUint8(0x0F);
    914   EmitUint8(0x2E);
    915   EmitXmmRegisterOperand(a.LowBits(), b);
    916 }
    917 
    918 
    919 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
    920   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    921   EmitUint8(0x66);
    922   EmitOptionalRex32(a, b);
    923   EmitUint8(0x0F);
    924   EmitUint8(0x2E);
    925   EmitOperand(a.LowBits(), b);
    926 }
    927 
    928 
    929 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
    930   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    931   EmitUint8(0x66);
    932   EmitOptionalRex32(dst, src);
    933   EmitUint8(0x0F);
    934   EmitUint8(0x3A);
    935   EmitUint8(0x0B);
    936   EmitXmmRegisterOperand(dst.LowBits(), src);
    937   EmitUint8(imm.value());
    938 }
    939 
    940 
    941 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
    942   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    943   EmitUint8(0x66);
    944   EmitOptionalRex32(dst, src);
    945   EmitUint8(0x0F);
    946   EmitUint8(0x3A);
    947   EmitUint8(0x0A);
    948   EmitXmmRegisterOperand(dst.LowBits(), src);
    949   EmitUint8(imm.value());
    950 }
    951 
    952 
    953 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
    954   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    955   EmitUint8(0xF2);
    956   EmitOptionalRex32(dst, src);
    957   EmitUint8(0x0F);
    958   EmitUint8(0x51);
    959   EmitXmmRegisterOperand(dst.LowBits(), src);
    960 }
    961 
    962 
    963 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
    964   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    965   EmitUint8(0xF3);
    966   EmitOptionalRex32(dst, src);
    967   EmitUint8(0x0F);
    968   EmitUint8(0x51);
    969   EmitXmmRegisterOperand(dst.LowBits(), src);
    970 }
    971 
    972 
    973 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
    974   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    975   EmitUint8(0x66);
    976   EmitOptionalRex32(dst, src);
    977   EmitUint8(0x0F);
    978   EmitUint8(0x57);
    979   EmitOperand(dst.LowBits(), src);
    980 }
    981 
    982 
    983 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
    984   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    985   EmitUint8(0x66);
    986   EmitOptionalRex32(dst, src);
    987   EmitUint8(0x0F);
    988   EmitUint8(0x57);
    989   EmitXmmRegisterOperand(dst.LowBits(), src);
    990 }
    991 
    992 
    993 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
    994   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    995   EmitOptionalRex32(dst, src);
    996   EmitUint8(0x0F);
    997   EmitUint8(0x57);
    998   EmitOperand(dst.LowBits(), src);
    999 }
   1000 
   1001 
   1002 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
   1003   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1004   EmitOptionalRex32(dst, src);
   1005   EmitUint8(0x0F);
   1006   EmitUint8(0x57);
   1007   EmitXmmRegisterOperand(dst.LowBits(), src);
   1008 }
   1009 
   1010 
   1011 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
   1012   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1013   EmitUint8(0x66);
   1014   EmitOptionalRex32(dst, src);
   1015   EmitUint8(0x0F);
   1016   EmitUint8(0x54);
   1017   EmitOperand(dst.LowBits(), src);
   1018 }
   1019 
   1020 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
   1021   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1022   EmitUint8(0x66);
   1023   EmitOptionalRex32(dst, src);
   1024   EmitUint8(0x0F);
   1025   EmitUint8(0x54);
   1026   EmitXmmRegisterOperand(dst.LowBits(), src);
   1027 }
   1028 
   1029 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
   1030   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1031   EmitOptionalRex32(dst, src);
   1032   EmitUint8(0x0F);
   1033   EmitUint8(0x54);
   1034   EmitXmmRegisterOperand(dst.LowBits(), src);
   1035 }
   1036 
   1037 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
   1038   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1039   EmitUint8(0x66);
   1040   EmitOptionalRex32(dst, src);
   1041   EmitUint8(0x0F);
   1042   EmitUint8(0x56);
   1043   EmitXmmRegisterOperand(dst.LowBits(), src);
   1044 }
   1045 
   1046 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
   1047   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1048   EmitOptionalRex32(dst, src);
   1049   EmitUint8(0x0F);
   1050   EmitUint8(0x56);
   1051   EmitXmmRegisterOperand(dst.LowBits(), src);
   1052 }
   1053 
   1054 void X86_64Assembler::fldl(const Address& src) {
   1055   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1056   EmitUint8(0xDD);
   1057   EmitOperand(0, src);
   1058 }
   1059 
   1060 
   1061 void X86_64Assembler::fstl(const Address& dst) {
   1062   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1063   EmitUint8(0xDD);
   1064   EmitOperand(2, dst);
   1065 }
   1066 
   1067 
   1068 void X86_64Assembler::fstpl(const Address& dst) {
   1069   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1070   EmitUint8(0xDD);
   1071   EmitOperand(3, dst);
   1072 }
   1073 
   1074 
   1075 void X86_64Assembler::fstsw() {
   1076   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1077   EmitUint8(0x9B);
   1078   EmitUint8(0xDF);
   1079   EmitUint8(0xE0);
   1080 }
   1081 
   1082 
   1083 void X86_64Assembler::fnstcw(const Address& dst) {
   1084   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1085   EmitUint8(0xD9);
   1086   EmitOperand(7, dst);
   1087 }
   1088 
   1089 
   1090 void X86_64Assembler::fldcw(const Address& src) {
   1091   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1092   EmitUint8(0xD9);
   1093   EmitOperand(5, src);
   1094 }
   1095 
   1096 
   1097 void X86_64Assembler::fistpl(const Address& dst) {
   1098   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1099   EmitUint8(0xDF);
   1100   EmitOperand(7, dst);
   1101 }
   1102 
   1103 
   1104 void X86_64Assembler::fistps(const Address& dst) {
   1105   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1106   EmitUint8(0xDB);
   1107   EmitOperand(3, dst);
   1108 }
   1109 
   1110 
   1111 void X86_64Assembler::fildl(const Address& src) {
   1112   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1113   EmitUint8(0xDF);
   1114   EmitOperand(5, src);
   1115 }
   1116 
   1117 
   1118 void X86_64Assembler::filds(const Address& src) {
   1119   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1120   EmitUint8(0xDB);
   1121   EmitOperand(0, src);
   1122 }
   1123 
   1124 
   1125 void X86_64Assembler::fincstp() {
   1126   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1127   EmitUint8(0xD9);
   1128   EmitUint8(0xF7);
   1129 }
   1130 
   1131 
   1132 void X86_64Assembler::ffree(const Immediate& index) {
   1133   CHECK_LT(index.value(), 7);
   1134   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1135   EmitUint8(0xDD);
   1136   EmitUint8(0xC0 + index.value());
   1137 }
   1138 
   1139 
   1140 void X86_64Assembler::fsin() {
   1141   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1142   EmitUint8(0xD9);
   1143   EmitUint8(0xFE);
   1144 }
   1145 
   1146 
   1147 void X86_64Assembler::fcos() {
   1148   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1149   EmitUint8(0xD9);
   1150   EmitUint8(0xFF);
   1151 }
   1152 
   1153 
   1154 void X86_64Assembler::fptan() {
   1155   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1156   EmitUint8(0xD9);
   1157   EmitUint8(0xF2);
   1158 }
   1159 
   1160 void X86_64Assembler::fucompp() {
   1161   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1162   EmitUint8(0xDA);
   1163   EmitUint8(0xE9);
   1164 }
   1165 
   1166 
   1167 void X86_64Assembler::fprem() {
   1168   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1169   EmitUint8(0xD9);
   1170   EmitUint8(0xF8);
   1171 }
   1172 
   1173 
   1174 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
   1175   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1176   // There is a short version for rax.
   1177   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
   1178   // work.
   1179   const bool src_rax = src.AsRegister() == RAX;
   1180   const bool dst_rax = dst.AsRegister() == RAX;
   1181   if (src_rax || dst_rax) {
   1182     EmitOptionalRex32(src_rax ? dst : src);
   1183     EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
   1184     return;
   1185   }
   1186 
   1187   // General case.
   1188   EmitOptionalRex32(src, dst);
   1189   EmitUint8(0x87);
   1190   EmitRegisterOperand(src.LowBits(), dst.LowBits());
   1191 }
   1192 
   1193 
   1194 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
   1195   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1196   // There is a short version for rax.
   1197   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
   1198   // work.
   1199   const bool src_rax = src.AsRegister() == RAX;
   1200   const bool dst_rax = dst.AsRegister() == RAX;
   1201   if (src_rax || dst_rax) {
   1202     // If src == target, emit a nop instead.
   1203     if (src_rax && dst_rax) {
   1204       EmitUint8(0x90);
   1205     } else {
   1206       EmitRex64(src_rax ? dst : src);
   1207       EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
   1208     }
   1209     return;
   1210   }
   1211 
   1212   // General case.
   1213   EmitRex64(src, dst);
   1214   EmitUint8(0x87);
   1215   EmitRegisterOperand(src.LowBits(), dst.LowBits());
   1216 }
   1217 
   1218 
   1219 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
   1220   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1221   EmitOptionalRex32(reg, address);
   1222   EmitUint8(0x87);
   1223   EmitOperand(reg.LowBits(), address);
   1224 }
   1225 
   1226 
   1227 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
   1228   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1229   CHECK(imm.is_int32());
   1230   EmitOperandSizeOverride();
   1231   EmitOptionalRex32(address);
   1232   EmitComplex(7, address, imm);
   1233 }
   1234 
   1235 
   1236 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
   1237   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1238   CHECK(imm.is_int32());
   1239   EmitOptionalRex32(reg);
   1240   EmitComplex(7, Operand(reg), imm);
   1241 }
   1242 
   1243 
   1244 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
   1245   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1246   EmitOptionalRex32(reg0, reg1);
   1247   EmitUint8(0x3B);
   1248   EmitOperand(reg0.LowBits(), Operand(reg1));
   1249 }
   1250 
   1251 
   1252 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
   1253   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1254   EmitOptionalRex32(reg, address);
   1255   EmitUint8(0x3B);
   1256   EmitOperand(reg.LowBits(), address);
   1257 }
   1258 
   1259 
   1260 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
   1261   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1262   EmitOptionalRex32(reg, address);
   1263   EmitUint8(0x39);
   1264   EmitOperand(reg.LowBits(), address);
   1265 }
   1266 
   1267 
   1268 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
   1269   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1270   CHECK(imm.is_int32());
   1271   EmitOptionalRex32(address);
   1272   EmitComplex(7, address, imm);
   1273 }
   1274 
   1275 
   1276 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
   1277   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1278   EmitRex64(reg0, reg1);
   1279   EmitUint8(0x3B);
   1280   EmitOperand(reg0.LowBits(), Operand(reg1));
   1281 }
   1282 
   1283 
   1284 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
   1285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1286   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
   1287   EmitRex64(reg);
   1288   EmitComplex(7, Operand(reg), imm);
   1289 }
   1290 
   1291 
   1292 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
   1293   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1294   EmitRex64(reg, address);
   1295   EmitUint8(0x3B);
   1296   EmitOperand(reg.LowBits(), address);
   1297 }
   1298 
   1299 
   1300 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
   1301   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
   1302   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1303   EmitRex64(address);
   1304   EmitComplex(7, address, imm);
   1305 }
   1306 
   1307 
   1308 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
   1309   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1310   EmitOptionalRex32(dst, src);
   1311   EmitUint8(0x03);
   1312   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   1313 }
   1314 
   1315 
   1316 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
   1317   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1318   EmitOptionalRex32(reg, address);
   1319   EmitUint8(0x03);
   1320   EmitOperand(reg.LowBits(), address);
   1321 }
   1322 
   1323 
   1324 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
   1325   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1326   EmitOptionalRex32(reg1, reg2);
   1327   EmitUint8(0x85);
   1328   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
   1329 }
   1330 
   1331 
   1332 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
   1333   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1334   EmitOptionalRex32(reg, address);
   1335   EmitUint8(0x85);
   1336   EmitOperand(reg.LowBits(), address);
   1337 }
   1338 
   1339 
   1340 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
   1341   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1342   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
   1343   // we only test the byte CpuRegister to keep the encoding short.
   1344   if (immediate.is_uint8() && reg.AsRegister() < 4) {
   1345     // Use zero-extended 8-bit immediate.
   1346     if (reg.AsRegister() == RAX) {
   1347       EmitUint8(0xA8);
   1348     } else {
   1349       EmitUint8(0xF6);
   1350       EmitUint8(0xC0 + reg.AsRegister());
   1351     }
   1352     EmitUint8(immediate.value() & 0xFF);
   1353   } else if (reg.AsRegister() == RAX) {
   1354     // Use short form if the destination is RAX.
   1355     EmitUint8(0xA9);
   1356     EmitImmediate(immediate);
   1357   } else {
   1358     EmitOptionalRex32(reg);
   1359     EmitUint8(0xF7);
   1360     EmitOperand(0, Operand(reg));
   1361     EmitImmediate(immediate);
   1362   }
   1363 }
   1364 
   1365 
   1366 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
   1367   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1368   EmitRex64(reg1, reg2);
   1369   EmitUint8(0x85);
   1370   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
   1371 }
   1372 
   1373 
   1374 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
   1375   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1376   EmitRex64(reg, address);
   1377   EmitUint8(0x85);
   1378   EmitOperand(reg.LowBits(), address);
   1379 }
   1380 
   1381 
   1382 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
   1383   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1384   EmitOptionalRex32(dst, src);
   1385   EmitUint8(0x23);
   1386   EmitOperand(dst.LowBits(), Operand(src));
   1387 }
   1388 
   1389 
   1390 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
   1391   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1392   EmitOptionalRex32(reg, address);
   1393   EmitUint8(0x23);
   1394   EmitOperand(reg.LowBits(), address);
   1395 }
   1396 
   1397 
   1398 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
   1399   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1400   EmitOptionalRex32(dst);
   1401   EmitComplex(4, Operand(dst), imm);
   1402 }
   1403 
   1404 
   1405 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
   1406   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1407   CHECK(imm.is_int32());  // andq only supports 32b immediate.
   1408   EmitRex64(reg);
   1409   EmitComplex(4, Operand(reg), imm);
   1410 }
   1411 
   1412 
   1413 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
   1414   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1415   EmitRex64(dst, src);
   1416   EmitUint8(0x23);
   1417   EmitOperand(dst.LowBits(), Operand(src));
   1418 }
   1419 
   1420 
   1421 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
   1422   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1423   EmitRex64(dst, src);
   1424   EmitUint8(0x23);
   1425   EmitOperand(dst.LowBits(), src);
   1426 }
   1427 
   1428 
   1429 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
   1430   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1431   EmitOptionalRex32(dst, src);
   1432   EmitUint8(0x0B);
   1433   EmitOperand(dst.LowBits(), Operand(src));
   1434 }
   1435 
   1436 
   1437 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
   1438   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1439   EmitOptionalRex32(reg, address);
   1440   EmitUint8(0x0B);
   1441   EmitOperand(reg.LowBits(), address);
   1442 }
   1443 
   1444 
   1445 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
   1446   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1447   EmitOptionalRex32(dst);
   1448   EmitComplex(1, Operand(dst), imm);
   1449 }
   1450 
   1451 
   1452 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
   1453   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1454   CHECK(imm.is_int32());  // orq only supports 32b immediate.
   1455   EmitRex64(dst);
   1456   EmitComplex(1, Operand(dst), imm);
   1457 }
   1458 
   1459 
   1460 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
   1461   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1462   EmitRex64(dst, src);
   1463   EmitUint8(0x0B);
   1464   EmitOperand(dst.LowBits(), Operand(src));
   1465 }
   1466 
   1467 
   1468 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
   1469   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1470   EmitRex64(dst, src);
   1471   EmitUint8(0x0B);
   1472   EmitOperand(dst.LowBits(), src);
   1473 }
   1474 
   1475 
   1476 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
   1477   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1478   EmitOptionalRex32(dst, src);
   1479   EmitUint8(0x33);
   1480   EmitOperand(dst.LowBits(), Operand(src));
   1481 }
   1482 
   1483 
   1484 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
   1485   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1486   EmitOptionalRex32(reg, address);
   1487   EmitUint8(0x33);
   1488   EmitOperand(reg.LowBits(), address);
   1489 }
   1490 
   1491 
   1492 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
   1493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1494   EmitOptionalRex32(dst);
   1495   EmitComplex(6, Operand(dst), imm);
   1496 }
   1497 
   1498 
   1499 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
   1500   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1501   EmitRex64(dst, src);
   1502   EmitUint8(0x33);
   1503   EmitOperand(dst.LowBits(), Operand(src));
   1504 }
   1505 
   1506 
   1507 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
   1508   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1509   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
   1510   EmitRex64(dst);
   1511   EmitComplex(6, Operand(dst), imm);
   1512 }
   1513 
   1514 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
   1515   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1516   EmitRex64(dst, src);
   1517   EmitUint8(0x33);
   1518   EmitOperand(dst.LowBits(), src);
   1519 }
   1520 
   1521 
   1522 #if 0
   1523 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
   1524   // REX.WRXB
   1525   // W - 64-bit operand
   1526   // R - MODRM.reg
   1527   // X - SIB.index
   1528   // B - MODRM.rm/SIB.base
   1529   uint8_t rex = force ? 0x40 : 0;
   1530   if (w) {
   1531     rex |= 0x48;  // REX.W000
   1532   }
   1533   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
   1534     rex |= 0x44;  // REX.0R00
   1535     *r = static_cast<Register>(*r - 8);
   1536   }
   1537   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
   1538     rex |= 0x42;  // REX.00X0
   1539     *x = static_cast<Register>(*x - 8);
   1540   }
   1541   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
   1542     rex |= 0x41;  // REX.000B
   1543     *b = static_cast<Register>(*b - 8);
   1544   }
   1545   if (rex != 0) {
   1546     EmitUint8(rex);
   1547   }
   1548 }
   1549 
   1550 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
   1551   // REX.WRXB
   1552   // W - 64-bit operand
   1553   // R - MODRM.reg
   1554   // X - SIB.index
   1555   // B - MODRM.rm/SIB.base
   1556   uint8_t rex = mem->rex();
   1557   if (force) {
   1558     rex |= 0x40;  // REX.0000
   1559   }
   1560   if (w) {
   1561     rex |= 0x48;  // REX.W000
   1562   }
   1563   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
   1564     rex |= 0x44;  // REX.0R00
   1565     *dst = static_cast<Register>(*dst - 8);
   1566   }
   1567   if (rex != 0) {
   1568     EmitUint8(rex);
   1569   }
   1570 }
   1571 
   1572 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
   1573 #endif
   1574 
   1575 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
   1576   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1577   EmitOptionalRex32(reg);
   1578   EmitComplex(0, Operand(reg), imm);
   1579 }
   1580 
   1581 
   1582 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
   1583   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1584   CHECK(imm.is_int32());  // addq only supports 32b immediate.
   1585   EmitRex64(reg);
   1586   EmitComplex(0, Operand(reg), imm);
   1587 }
   1588 
   1589 
   1590 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
   1591   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1592   EmitRex64(dst, address);
   1593   EmitUint8(0x03);
   1594   EmitOperand(dst.LowBits(), address);
   1595 }
   1596 
   1597 
   1598 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
   1599   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1600   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
   1601   EmitRex64(src, dst);
   1602   EmitUint8(0x01);
   1603   EmitRegisterOperand(src.LowBits(), dst.LowBits());
   1604 }
   1605 
   1606 
   1607 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
   1608   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1609   EmitOptionalRex32(reg, address);
   1610   EmitUint8(0x01);
   1611   EmitOperand(reg.LowBits(), address);
   1612 }
   1613 
   1614 
   1615 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
   1616   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1617   EmitOptionalRex32(address);
   1618   EmitComplex(0, address, imm);
   1619 }
   1620 
   1621 
   1622 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
   1623   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1624   EmitOptionalRex32(dst, src);
   1625   EmitUint8(0x2B);
   1626   EmitOperand(dst.LowBits(), Operand(src));
   1627 }
   1628 
   1629 
   1630 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
   1631   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1632   EmitOptionalRex32(reg);
   1633   EmitComplex(5, Operand(reg), imm);
   1634 }
   1635 
   1636 
   1637 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
   1638   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1639   CHECK(imm.is_int32());  // subq only supports 32b immediate.
   1640   EmitRex64(reg);
   1641   EmitComplex(5, Operand(reg), imm);
   1642 }
   1643 
   1644 
   1645 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
   1646   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1647   EmitRex64(dst, src);
   1648   EmitUint8(0x2B);
   1649   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   1650 }
   1651 
   1652 
   1653 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
   1654   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1655   EmitRex64(reg, address);
   1656   EmitUint8(0x2B);
   1657   EmitOperand(reg.LowBits() & 7, address);
   1658 }
   1659 
   1660 
   1661 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
   1662   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1663   EmitOptionalRex32(reg, address);
   1664   EmitUint8(0x2B);
   1665   EmitOperand(reg.LowBits(), address);
   1666 }
   1667 
   1668 
   1669 void X86_64Assembler::cdq() {
   1670   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1671   EmitUint8(0x99);
   1672 }
   1673 
   1674 
   1675 void X86_64Assembler::cqo() {
   1676   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1677   EmitRex64();
   1678   EmitUint8(0x99);
   1679 }
   1680 
   1681 
   1682 void X86_64Assembler::idivl(CpuRegister reg) {
   1683   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1684   EmitOptionalRex32(reg);
   1685   EmitUint8(0xF7);
   1686   EmitUint8(0xF8 | reg.LowBits());
   1687 }
   1688 
   1689 
   1690 void X86_64Assembler::idivq(CpuRegister reg) {
   1691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1692   EmitRex64(reg);
   1693   EmitUint8(0xF7);
   1694   EmitUint8(0xF8 | reg.LowBits());
   1695 }
   1696 
   1697 
   1698 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
   1699   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1700   EmitOptionalRex32(dst, src);
   1701   EmitUint8(0x0F);
   1702   EmitUint8(0xAF);
   1703   EmitOperand(dst.LowBits(), Operand(src));
   1704 }
   1705 
   1706 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
   1707   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1708   CHECK(imm.is_int32());  // imull only supports 32b immediate.
   1709 
   1710   EmitOptionalRex32(dst, src);
   1711 
   1712   // See whether imm can be represented as a sign-extended 8bit value.
   1713   int32_t v32 = static_cast<int32_t>(imm.value());
   1714   if (IsInt<8>(v32)) {
   1715     // Sign-extension works.
   1716     EmitUint8(0x6B);
   1717     EmitOperand(dst.LowBits(), Operand(src));
   1718     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
   1719   } else {
   1720     // Not representable, use full immediate.
   1721     EmitUint8(0x69);
   1722     EmitOperand(dst.LowBits(), Operand(src));
   1723     EmitImmediate(imm);
   1724   }
   1725 }
   1726 
   1727 
   1728 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
   1729   imull(reg, reg, imm);
   1730 }
   1731 
   1732 
   1733 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
   1734   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1735   EmitOptionalRex32(reg, address);
   1736   EmitUint8(0x0F);
   1737   EmitUint8(0xAF);
   1738   EmitOperand(reg.LowBits(), address);
   1739 }
   1740 
   1741 
   1742 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
   1743   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1744   EmitRex64(dst, src);
   1745   EmitUint8(0x0F);
   1746   EmitUint8(0xAF);
   1747   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   1748 }
   1749 
   1750 
   1751 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
   1752   imulq(reg, reg, imm);
   1753 }
   1754 
   1755 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
   1756   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1757   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
   1758 
   1759   EmitRex64(dst, reg);
   1760 
   1761   // See whether imm can be represented as a sign-extended 8bit value.
   1762   int64_t v64 = imm.value();
   1763   if (IsInt<8>(v64)) {
   1764     // Sign-extension works.
   1765     EmitUint8(0x6B);
   1766     EmitOperand(dst.LowBits(), Operand(reg));
   1767     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
   1768   } else {
   1769     // Not representable, use full immediate.
   1770     EmitUint8(0x69);
   1771     EmitOperand(dst.LowBits(), Operand(reg));
   1772     EmitImmediate(imm);
   1773   }
   1774 }
   1775 
   1776 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
   1777   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1778   EmitRex64(reg, address);
   1779   EmitUint8(0x0F);
   1780   EmitUint8(0xAF);
   1781   EmitOperand(reg.LowBits(), address);
   1782 }
   1783 
   1784 
   1785 void X86_64Assembler::imull(CpuRegister reg) {
   1786   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1787   EmitOptionalRex32(reg);
   1788   EmitUint8(0xF7);
   1789   EmitOperand(5, Operand(reg));
   1790 }
   1791 
   1792 
   1793 void X86_64Assembler::imulq(CpuRegister reg) {
   1794   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1795   EmitRex64(reg);
   1796   EmitUint8(0xF7);
   1797   EmitOperand(5, Operand(reg));
   1798 }
   1799 
   1800 
   1801 void X86_64Assembler::imull(const Address& address) {
   1802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1803   EmitOptionalRex32(address);
   1804   EmitUint8(0xF7);
   1805   EmitOperand(5, address);
   1806 }
   1807 
   1808 
   1809 void X86_64Assembler::mull(CpuRegister reg) {
   1810   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1811   EmitOptionalRex32(reg);
   1812   EmitUint8(0xF7);
   1813   EmitOperand(4, Operand(reg));
   1814 }
   1815 
   1816 
   1817 void X86_64Assembler::mull(const Address& address) {
   1818   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1819   EmitOptionalRex32(address);
   1820   EmitUint8(0xF7);
   1821   EmitOperand(4, address);
   1822 }
   1823 
   1824 
   1825 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
   1826   EmitGenericShift(false, 4, reg, imm);
   1827 }
   1828 
   1829 
   1830 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
   1831   EmitGenericShift(true, 4, reg, imm);
   1832 }
   1833 
   1834 
   1835 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
   1836   EmitGenericShift(false, 4, operand, shifter);
   1837 }
   1838 
   1839 
   1840 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
   1841   EmitGenericShift(true, 4, operand, shifter);
   1842 }
   1843 
   1844 
   1845 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
   1846   EmitGenericShift(false, 5, reg, imm);
   1847 }
   1848 
   1849 
   1850 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
   1851   EmitGenericShift(true, 5, reg, imm);
   1852 }
   1853 
   1854 
   1855 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
   1856   EmitGenericShift(false, 5, operand, shifter);
   1857 }
   1858 
   1859 
   1860 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
   1861   EmitGenericShift(true, 5, operand, shifter);
   1862 }
   1863 
   1864 
   1865 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
   1866   EmitGenericShift(false, 7, reg, imm);
   1867 }
   1868 
   1869 
   1870 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
   1871   EmitGenericShift(false, 7, operand, shifter);
   1872 }
   1873 
   1874 
   1875 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
   1876   EmitGenericShift(true, 7, reg, imm);
   1877 }
   1878 
   1879 
   1880 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
   1881   EmitGenericShift(true, 7, operand, shifter);
   1882 }
   1883 
   1884 
   1885 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
   1886   EmitGenericShift(false, 0, reg, imm);
   1887 }
   1888 
   1889 
   1890 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
   1891   EmitGenericShift(false, 0, operand, shifter);
   1892 }
   1893 
   1894 
   1895 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
   1896   EmitGenericShift(false, 1, reg, imm);
   1897 }
   1898 
   1899 
   1900 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
   1901   EmitGenericShift(false, 1, operand, shifter);
   1902 }
   1903 
   1904 
   1905 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
   1906   EmitGenericShift(true, 0, reg, imm);
   1907 }
   1908 
   1909 
   1910 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
   1911   EmitGenericShift(true, 0, operand, shifter);
   1912 }
   1913 
   1914 
   1915 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
   1916   EmitGenericShift(true, 1, reg, imm);
   1917 }
   1918 
   1919 
   1920 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
   1921   EmitGenericShift(true, 1, operand, shifter);
   1922 }
   1923 
   1924 
   1925 void X86_64Assembler::negl(CpuRegister reg) {
   1926   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1927   EmitOptionalRex32(reg);
   1928   EmitUint8(0xF7);
   1929   EmitOperand(3, Operand(reg));
   1930 }
   1931 
   1932 
   1933 void X86_64Assembler::negq(CpuRegister reg) {
   1934   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1935   EmitRex64(reg);
   1936   EmitUint8(0xF7);
   1937   EmitOperand(3, Operand(reg));
   1938 }
   1939 
   1940 
   1941 void X86_64Assembler::notl(CpuRegister reg) {
   1942   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1943   EmitOptionalRex32(reg);
   1944   EmitUint8(0xF7);
   1945   EmitUint8(0xD0 | reg.LowBits());
   1946 }
   1947 
   1948 
   1949 void X86_64Assembler::notq(CpuRegister reg) {
   1950   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1951   EmitRex64(reg);
   1952   EmitUint8(0xF7);
   1953   EmitOperand(2, Operand(reg));
   1954 }
   1955 
   1956 
   1957 void X86_64Assembler::enter(const Immediate& imm) {
   1958   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1959   EmitUint8(0xC8);
   1960   CHECK(imm.is_uint16()) << imm.value();
   1961   EmitUint8(imm.value() & 0xFF);
   1962   EmitUint8((imm.value() >> 8) & 0xFF);
   1963   EmitUint8(0x00);
   1964 }
   1965 
   1966 
   1967 void X86_64Assembler::leave() {
   1968   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1969   EmitUint8(0xC9);
   1970 }
   1971 
   1972 
   1973 void X86_64Assembler::ret() {
   1974   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1975   EmitUint8(0xC3);
   1976 }
   1977 
   1978 
   1979 void X86_64Assembler::ret(const Immediate& imm) {
   1980   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1981   EmitUint8(0xC2);
   1982   CHECK(imm.is_uint16());
   1983   EmitUint8(imm.value() & 0xFF);
   1984   EmitUint8((imm.value() >> 8) & 0xFF);
   1985 }
   1986 
   1987 
   1988 
   1989 void X86_64Assembler::nop() {
   1990   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1991   EmitUint8(0x90);
   1992 }
   1993 
   1994 
   1995 void X86_64Assembler::int3() {
   1996   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1997   EmitUint8(0xCC);
   1998 }
   1999 
   2000 
   2001 void X86_64Assembler::hlt() {
   2002   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2003   EmitUint8(0xF4);
   2004 }
   2005 
   2006 
   2007 void X86_64Assembler::j(Condition condition, Label* label) {
   2008   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2009   if (label->IsBound()) {
   2010     static const int kShortSize = 2;
   2011     static const int kLongSize = 6;
   2012     int offset = label->Position() - buffer_.Size();
   2013     CHECK_LE(offset, 0);
   2014     if (IsInt<8>(offset - kShortSize)) {
   2015       EmitUint8(0x70 + condition);
   2016       EmitUint8((offset - kShortSize) & 0xFF);
   2017     } else {
   2018       EmitUint8(0x0F);
   2019       EmitUint8(0x80 + condition);
   2020       EmitInt32(offset - kLongSize);
   2021     }
   2022   } else {
   2023     EmitUint8(0x0F);
   2024     EmitUint8(0x80 + condition);
   2025     EmitLabelLink(label);
   2026   }
   2027 }
   2028 
   2029 
   2030 void X86_64Assembler::j(Condition condition, NearLabel* label) {
   2031   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2032   if (label->IsBound()) {
   2033     static const int kShortSize = 2;
   2034     int offset = label->Position() - buffer_.Size();
   2035     CHECK_LE(offset, 0);
   2036     CHECK(IsInt<8>(offset - kShortSize));
   2037     EmitUint8(0x70 + condition);
   2038     EmitUint8((offset - kShortSize) & 0xFF);
   2039   } else {
   2040     EmitUint8(0x70 + condition);
   2041     EmitLabelLink(label);
   2042   }
   2043 }
   2044 
   2045 
   2046 void X86_64Assembler::jrcxz(NearLabel* label) {
   2047   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2048   if (label->IsBound()) {
   2049     static const int kShortSize = 2;
   2050     int offset = label->Position() - buffer_.Size();
   2051     CHECK_LE(offset, 0);
   2052     CHECK(IsInt<8>(offset - kShortSize));
   2053     EmitUint8(0xE3);
   2054     EmitUint8((offset - kShortSize) & 0xFF);
   2055   } else {
   2056     EmitUint8(0xE3);
   2057     EmitLabelLink(label);
   2058   }
   2059 }
   2060 
   2061 
   2062 void X86_64Assembler::jmp(CpuRegister reg) {
   2063   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2064   EmitOptionalRex32(reg);
   2065   EmitUint8(0xFF);
   2066   EmitRegisterOperand(4, reg.LowBits());
   2067 }
   2068 
   2069 void X86_64Assembler::jmp(const Address& address) {
   2070   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2071   EmitOptionalRex32(address);
   2072   EmitUint8(0xFF);
   2073   EmitOperand(4, address);
   2074 }
   2075 
   2076 void X86_64Assembler::jmp(Label* label) {
   2077   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2078   if (label->IsBound()) {
   2079     static const int kShortSize = 2;
   2080     static const int kLongSize = 5;
   2081     int offset = label->Position() - buffer_.Size();
   2082     CHECK_LE(offset, 0);
   2083     if (IsInt<8>(offset - kShortSize)) {
   2084       EmitUint8(0xEB);
   2085       EmitUint8((offset - kShortSize) & 0xFF);
   2086     } else {
   2087       EmitUint8(0xE9);
   2088       EmitInt32(offset - kLongSize);
   2089     }
   2090   } else {
   2091     EmitUint8(0xE9);
   2092     EmitLabelLink(label);
   2093   }
   2094 }
   2095 
   2096 
   2097 void X86_64Assembler::jmp(NearLabel* label) {
   2098   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2099   if (label->IsBound()) {
   2100     static const int kShortSize = 2;
   2101     int offset = label->Position() - buffer_.Size();
   2102     CHECK_LE(offset, 0);
   2103     CHECK(IsInt<8>(offset - kShortSize));
   2104     EmitUint8(0xEB);
   2105     EmitUint8((offset - kShortSize) & 0xFF);
   2106   } else {
   2107     EmitUint8(0xEB);
   2108     EmitLabelLink(label);
   2109   }
   2110 }
   2111 
   2112 
   2113 void X86_64Assembler::rep_movsw() {
   2114   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2115   EmitUint8(0x66);
   2116   EmitUint8(0xF3);
   2117   EmitUint8(0xA5);
   2118 }
   2119 
   2120 
   2121 X86_64Assembler* X86_64Assembler::lock() {
   2122   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2123   EmitUint8(0xF0);
   2124   return this;
   2125 }
   2126 
   2127 
   2128 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
   2129   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2130   EmitOptionalRex32(reg, address);
   2131   EmitUint8(0x0F);
   2132   EmitUint8(0xB1);
   2133   EmitOperand(reg.LowBits(), address);
   2134 }
   2135 
   2136 
   2137 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
   2138   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2139   EmitRex64(reg, address);
   2140   EmitUint8(0x0F);
   2141   EmitUint8(0xB1);
   2142   EmitOperand(reg.LowBits(), address);
   2143 }
   2144 
   2145 
   2146 void X86_64Assembler::mfence() {
   2147   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2148   EmitUint8(0x0F);
   2149   EmitUint8(0xAE);
   2150   EmitUint8(0xF0);
   2151 }
   2152 
   2153 
   2154 X86_64Assembler* X86_64Assembler::gs() {
   2155   // TODO: gs is a prefix and not an instruction
   2156   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2157   EmitUint8(0x65);
   2158   return this;
   2159 }
   2160 
   2161 
   2162 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
   2163   int value = imm.value();
   2164   if (value != 0) {
   2165     if (value > 0) {
   2166       addl(reg, imm);
   2167     } else {
   2168       subl(reg, Immediate(value));
   2169     }
   2170   }
   2171 }
   2172 
   2173 
   2174 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
   2175   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2176   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
   2177   if (dst.NeedsRex() || dst.AsRegister() > 3) {
   2178     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
   2179   }
   2180   EmitUint8(0x0F);
   2181   EmitUint8(0x90 + condition);
   2182   EmitUint8(0xC0 + dst.LowBits());
   2183 }
   2184 
   2185 void X86_64Assembler::bswapl(CpuRegister dst) {
   2186   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2187   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
   2188   EmitUint8(0x0F);
   2189   EmitUint8(0xC8 + dst.LowBits());
   2190 }
   2191 
   2192 void X86_64Assembler::bswapq(CpuRegister dst) {
   2193   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2194   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
   2195   EmitUint8(0x0F);
   2196   EmitUint8(0xC8 + dst.LowBits());
   2197 }
   2198 
   2199 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
   2200   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2201   EmitOptionalRex32(dst, src);
   2202   EmitUint8(0x0F);
   2203   EmitUint8(0xBC);
   2204   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2205 }
   2206 
   2207 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
   2208   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2209   EmitOptionalRex32(dst, src);
   2210   EmitUint8(0x0F);
   2211   EmitUint8(0xBC);
   2212   EmitOperand(dst.LowBits(), src);
   2213 }
   2214 
   2215 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
   2216   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2217   EmitRex64(dst, src);
   2218   EmitUint8(0x0F);
   2219   EmitUint8(0xBC);
   2220   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2221 }
   2222 
   2223 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
   2224   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2225   EmitRex64(dst, src);
   2226   EmitUint8(0x0F);
   2227   EmitUint8(0xBC);
   2228   EmitOperand(dst.LowBits(), src);
   2229 }
   2230 
   2231 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
   2232   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2233   EmitOptionalRex32(dst, src);
   2234   EmitUint8(0x0F);
   2235   EmitUint8(0xBD);
   2236   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2237 }
   2238 
   2239 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
   2240   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2241   EmitOptionalRex32(dst, src);
   2242   EmitUint8(0x0F);
   2243   EmitUint8(0xBD);
   2244   EmitOperand(dst.LowBits(), src);
   2245 }
   2246 
   2247 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
   2248   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2249   EmitRex64(dst, src);
   2250   EmitUint8(0x0F);
   2251   EmitUint8(0xBD);
   2252   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2253 }
   2254 
   2255 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
   2256   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2257   EmitRex64(dst, src);
   2258   EmitUint8(0x0F);
   2259   EmitUint8(0xBD);
   2260   EmitOperand(dst.LowBits(), src);
   2261 }
   2262 
   2263 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
   2264   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2265   EmitUint8(0xF3);
   2266   EmitOptionalRex32(dst, src);
   2267   EmitUint8(0x0F);
   2268   EmitUint8(0xB8);
   2269   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2270 }
   2271 
   2272 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
   2273   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2274   EmitUint8(0xF3);
   2275   EmitOptionalRex32(dst, src);
   2276   EmitUint8(0x0F);
   2277   EmitUint8(0xB8);
   2278   EmitOperand(dst.LowBits(), src);
   2279 }
   2280 
   2281 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
   2282   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2283   EmitUint8(0xF3);
   2284   EmitRex64(dst, src);
   2285   EmitUint8(0x0F);
   2286   EmitUint8(0xB8);
   2287   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2288 }
   2289 
   2290 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
   2291   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2292   EmitUint8(0xF3);
   2293   EmitRex64(dst, src);
   2294   EmitUint8(0x0F);
   2295   EmitUint8(0xB8);
   2296   EmitOperand(dst.LowBits(), src);
   2297 }
   2298 
   2299 void X86_64Assembler::repne_scasw() {
   2300   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2301   EmitUint8(0x66);
   2302   EmitUint8(0xF2);
   2303   EmitUint8(0xAF);
   2304 }
   2305 
   2306 
   2307 void X86_64Assembler::repe_cmpsw() {
   2308   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2309   EmitUint8(0x66);
   2310   EmitUint8(0xF3);
   2311   EmitUint8(0xA7);
   2312 }
   2313 
   2314 
   2315 void X86_64Assembler::repe_cmpsl() {
   2316   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2317   EmitUint8(0xF3);
   2318   EmitUint8(0xA7);
   2319 }
   2320 
   2321 
   2322 void X86_64Assembler::repe_cmpsq() {
   2323   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2324   EmitUint8(0xF3);
   2325   EmitRex64();
   2326   EmitUint8(0xA7);
   2327 }
   2328 
   2329 
   2330 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   2331   // TODO: Need to have a code constants table.
   2332   int64_t constant = bit_cast<int64_t, double>(value);
   2333   pushq(Immediate(High32Bits(constant)));
   2334   pushq(Immediate(Low32Bits(constant)));
   2335   movsd(dst, Address(CpuRegister(RSP), 0));
   2336   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
   2337 }
   2338 
   2339 
   2340 void X86_64Assembler::Align(int alignment, int offset) {
   2341   CHECK(IsPowerOfTwo(alignment));
   2342   // Emit nop instruction until the real position is aligned.
   2343   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
   2344     nop();
   2345   }
   2346 }
   2347 
   2348 
   2349 void X86_64Assembler::Bind(Label* label) {
   2350   int bound = buffer_.Size();
   2351   CHECK(!label->IsBound());  // Labels can only be bound once.
   2352   while (label->IsLinked()) {
   2353     int position = label->LinkPosition();
   2354     int next = buffer_.Load<int32_t>(position);
   2355     buffer_.Store<int32_t>(position, bound - (position + 4));
   2356     label->position_ = next;
   2357   }
   2358   label->BindTo(bound);
   2359 }
   2360 
   2361 
   2362 void X86_64Assembler::Bind(NearLabel* label) {
   2363   int bound = buffer_.Size();
   2364   CHECK(!label->IsBound());  // Labels can only be bound once.
   2365   while (label->IsLinked()) {
   2366     int position = label->LinkPosition();
   2367     uint8_t delta = buffer_.Load<uint8_t>(position);
   2368     int offset = bound - (position + 1);
   2369     CHECK(IsInt<8>(offset));
   2370     buffer_.Store<int8_t>(position, offset);
   2371     label->position_ = delta != 0u ? label->position_ - delta : 0;
   2372   }
   2373   label->BindTo(bound);
   2374 }
   2375 
   2376 
   2377 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
   2378   CHECK_GE(reg_or_opcode, 0);
   2379   CHECK_LT(reg_or_opcode, 8);
   2380   const int length = operand.length_;
   2381   CHECK_GT(length, 0);
   2382   // Emit the ModRM byte updated with the given reg value.
   2383   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
   2384   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
   2385   // Emit the rest of the encoded operand.
   2386   for (int i = 1; i < length; i++) {
   2387     EmitUint8(operand.encoding_[i]);
   2388   }
   2389   AssemblerFixup* fixup = operand.GetFixup();
   2390   if (fixup != nullptr) {
   2391     EmitFixup(fixup);
   2392   }
   2393 }
   2394 
   2395 
   2396 void X86_64Assembler::EmitImmediate(const Immediate& imm) {
   2397   if (imm.is_int32()) {
   2398     EmitInt32(static_cast<int32_t>(imm.value()));
   2399   } else {
   2400     EmitInt64(imm.value());
   2401   }
   2402 }
   2403 
   2404 
   2405 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
   2406                                   const Operand& operand,
   2407                                   const Immediate& immediate) {
   2408   CHECK_GE(reg_or_opcode, 0);
   2409   CHECK_LT(reg_or_opcode, 8);
   2410   if (immediate.is_int8()) {
   2411     // Use sign-extended 8-bit immediate.
   2412     EmitUint8(0x83);
   2413     EmitOperand(reg_or_opcode, operand);
   2414     EmitUint8(immediate.value() & 0xFF);
   2415   } else if (operand.IsRegister(CpuRegister(RAX))) {
   2416     // Use short form if the destination is eax.
   2417     EmitUint8(0x05 + (reg_or_opcode << 3));
   2418     EmitImmediate(immediate);
   2419   } else {
   2420     EmitUint8(0x81);
   2421     EmitOperand(reg_or_opcode, operand);
   2422     EmitImmediate(immediate);
   2423   }
   2424 }
   2425 
   2426 
   2427 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
   2428   if (label->IsBound()) {
   2429     int offset = label->Position() - buffer_.Size();
   2430     CHECK_LE(offset, 0);
   2431     EmitInt32(offset - instruction_size);
   2432   } else {
   2433     EmitLabelLink(label);
   2434   }
   2435 }
   2436 
   2437 
   2438 void X86_64Assembler::EmitLabelLink(Label* label) {
   2439   CHECK(!label->IsBound());
   2440   int position = buffer_.Size();
   2441   EmitInt32(label->position_);
   2442   label->LinkTo(position);
   2443 }
   2444 
   2445 
   2446 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
   2447   CHECK(!label->IsBound());
   2448   int position = buffer_.Size();
   2449   if (label->IsLinked()) {
   2450     // Save the delta in the byte that we have to play with.
   2451     uint32_t delta = position - label->LinkPosition();
   2452     CHECK(IsUint<8>(delta));
   2453     EmitUint8(delta & 0xFF);
   2454   } else {
   2455     EmitUint8(0);
   2456   }
   2457   label->LinkTo(position);
   2458 }
   2459 
   2460 
   2461 void X86_64Assembler::EmitGenericShift(bool wide,
   2462                                        int reg_or_opcode,
   2463                                        CpuRegister reg,
   2464                                        const Immediate& imm) {
   2465   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2466   CHECK(imm.is_int8());
   2467   if (wide) {
   2468     EmitRex64(reg);
   2469   } else {
   2470     EmitOptionalRex32(reg);
   2471   }
   2472   if (imm.value() == 1) {
   2473     EmitUint8(0xD1);
   2474     EmitOperand(reg_or_opcode, Operand(reg));
   2475   } else {
   2476     EmitUint8(0xC1);
   2477     EmitOperand(reg_or_opcode, Operand(reg));
   2478     EmitUint8(imm.value() & 0xFF);
   2479   }
   2480 }
   2481 
   2482 
   2483 void X86_64Assembler::EmitGenericShift(bool wide,
   2484                                        int reg_or_opcode,
   2485                                        CpuRegister operand,
   2486                                        CpuRegister shifter) {
   2487   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2488   CHECK_EQ(shifter.AsRegister(), RCX);
   2489   if (wide) {
   2490     EmitRex64(operand);
   2491   } else {
   2492     EmitOptionalRex32(operand);
   2493   }
   2494   EmitUint8(0xD3);
   2495   EmitOperand(reg_or_opcode, Operand(operand));
   2496 }
   2497 
   2498 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
   2499   // REX.WRXB
   2500   // W - 64-bit operand
   2501   // R - MODRM.reg
   2502   // X - SIB.index
   2503   // B - MODRM.rm/SIB.base
   2504   uint8_t rex = force ? 0x40 : 0;
   2505   if (w) {
   2506     rex |= 0x48;  // REX.W000
   2507   }
   2508   if (r) {
   2509     rex |= 0x44;  // REX.0R00
   2510   }
   2511   if (x) {
   2512     rex |= 0x42;  // REX.00X0
   2513   }
   2514   if (b) {
   2515     rex |= 0x41;  // REX.000B
   2516   }
   2517   if (rex != 0) {
   2518     EmitUint8(rex);
   2519   }
   2520 }
   2521 
   2522 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
   2523   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   2524 }
   2525 
   2526 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
   2527   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   2528 }
   2529 
   2530 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
   2531   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   2532 }
   2533 
   2534 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
   2535   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   2536 }
   2537 
   2538 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
   2539   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   2540 }
   2541 
   2542 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
   2543   uint8_t rex = operand.rex();
   2544   if (rex != 0) {
   2545     EmitUint8(rex);
   2546   }
   2547 }
   2548 
   2549 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
   2550   uint8_t rex = operand.rex();
   2551   if (dst.NeedsRex()) {
   2552     rex |= 0x44;  // REX.0R00
   2553   }
   2554   if (rex != 0) {
   2555     EmitUint8(rex);
   2556   }
   2557 }
   2558 
   2559 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
   2560   uint8_t rex = operand.rex();
   2561   if (dst.NeedsRex()) {
   2562     rex |= 0x44;  // REX.0R00
   2563   }
   2564   if (rex != 0) {
   2565     EmitUint8(rex);
   2566   }
   2567 }
   2568 
   2569 void X86_64Assembler::EmitRex64() {
   2570   EmitOptionalRex(false, true, false, false, false);
   2571 }
   2572 
   2573 void X86_64Assembler::EmitRex64(CpuRegister reg) {
   2574   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
   2575 }
   2576 
   2577 void X86_64Assembler::EmitRex64(const Operand& operand) {
   2578   uint8_t rex = operand.rex();
   2579   rex |= 0x48;  // REX.W000
   2580   EmitUint8(rex);
   2581 }
   2582 
   2583 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
   2584   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
   2585 }
   2586 
   2587 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
   2588   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
   2589 }
   2590 
   2591 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
   2592   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
   2593 }
   2594 
   2595 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
   2596   uint8_t rex = 0x48 | operand.rex();  // REX.W000
   2597   if (dst.NeedsRex()) {
   2598     rex |= 0x44;  // REX.0R00
   2599   }
   2600   EmitUint8(rex);
   2601 }
   2602 
   2603 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
   2604   uint8_t rex = 0x48 | operand.rex();  // REX.W000
   2605   if (dst.NeedsRex()) {
   2606     rex |= 0x44;  // REX.0R00
   2607   }
   2608   EmitUint8(rex);
   2609 }
   2610 
   2611 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
   2612   // For src, SPL, BPL, SIL, DIL need the rex prefix.
   2613   bool force = src.AsRegister() > 3;
   2614   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
   2615 }
   2616 
   2617 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
   2618   uint8_t rex = operand.rex();
   2619   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
   2620   bool force = dst.AsRegister() > 3;
   2621   if (force) {
   2622     rex |= 0x40;  // REX.0000
   2623   }
   2624   if (dst.NeedsRex()) {
   2625     rex |= 0x44;  // REX.0R00
   2626   }
   2627   if (rex != 0) {
   2628     EmitUint8(rex);
   2629   }
   2630 }
   2631 
   2632 static dwarf::Reg DWARFReg(Register reg) {
   2633   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
   2634 }
   2635 static dwarf::Reg DWARFReg(FloatRegister reg) {
   2636   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
   2637 }
   2638 
   2639 constexpr size_t kFramePointerSize = 8;
   2640 
   2641 void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
   2642                                  const std::vector<ManagedRegister>& spill_regs,
   2643                                  const ManagedRegisterEntrySpills& entry_spills) {
   2644   DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   2645   cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
   2646   CHECK_ALIGNED(frame_size, kStackAlignment);
   2647   int gpr_count = 0;
   2648   for (int i = spill_regs.size() - 1; i >= 0; --i) {
   2649     x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
   2650     if (spill.IsCpuRegister()) {
   2651       pushq(spill.AsCpuRegister());
   2652       gpr_count++;
   2653       cfi_.AdjustCFAOffset(kFramePointerSize);
   2654       cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
   2655     }
   2656   }
   2657   // return address then method on stack.
   2658   int64_t rest_of_frame = static_cast<int64_t>(frame_size)
   2659                           - (gpr_count * kFramePointerSize)
   2660                           - kFramePointerSize /*return address*/;
   2661   subq(CpuRegister(RSP), Immediate(rest_of_frame));
   2662   cfi_.AdjustCFAOffset(rest_of_frame);
   2663 
   2664   // spill xmms
   2665   int64_t offset = rest_of_frame;
   2666   for (int i = spill_regs.size() - 1; i >= 0; --i) {
   2667     x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
   2668     if (spill.IsXmmRegister()) {
   2669       offset -= sizeof(double);
   2670       movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
   2671       cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
   2672     }
   2673   }
   2674 
   2675   DCHECK_EQ(kX86_64PointerSize, kFramePointerSize);
   2676 
   2677   movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
   2678 
   2679   for (size_t i = 0; i < entry_spills.size(); ++i) {
   2680     ManagedRegisterSpill spill = entry_spills.at(i);
   2681     if (spill.AsX86_64().IsCpuRegister()) {
   2682       if (spill.getSize() == 8) {
   2683         movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
   2684              spill.AsX86_64().AsCpuRegister());
   2685       } else {
   2686         CHECK_EQ(spill.getSize(), 4);
   2687         movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsCpuRegister());
   2688       }
   2689     } else {
   2690       if (spill.getSize() == 8) {
   2691         movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
   2692       } else {
   2693         CHECK_EQ(spill.getSize(), 4);
   2694         movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
   2695       }
   2696     }
   2697   }
   2698 }
   2699 
   2700 void X86_64Assembler::RemoveFrame(size_t frame_size,
   2701                             const std::vector<ManagedRegister>& spill_regs) {
   2702   CHECK_ALIGNED(frame_size, kStackAlignment);
   2703   cfi_.RememberState();
   2704   int gpr_count = 0;
   2705   // unspill xmms
   2706   int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
   2707   for (size_t i = 0; i < spill_regs.size(); ++i) {
   2708     x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
   2709     if (spill.IsXmmRegister()) {
   2710       offset += sizeof(double);
   2711       movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
   2712       cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
   2713     } else {
   2714       gpr_count++;
   2715     }
   2716   }
   2717   int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
   2718   addq(CpuRegister(RSP), Immediate(adjust));
   2719   cfi_.AdjustCFAOffset(-adjust);
   2720   for (size_t i = 0; i < spill_regs.size(); ++i) {
   2721     x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
   2722     if (spill.IsCpuRegister()) {
   2723       popq(spill.AsCpuRegister());
   2724       cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
   2725       cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
   2726     }
   2727   }
   2728   ret();
   2729   // The CFI should be restored for any code that follows the exit block.
   2730   cfi_.RestoreState();
   2731   cfi_.DefCFAOffset(frame_size);
   2732 }
   2733 
   2734 void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
   2735   CHECK_ALIGNED(adjust, kStackAlignment);
   2736   addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
   2737   cfi_.AdjustCFAOffset(adjust);
   2738 }
   2739 
   2740 void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
   2741   CHECK_ALIGNED(adjust, kStackAlignment);
   2742   addq(CpuRegister(RSP), Immediate(adjust));
   2743   cfi_.AdjustCFAOffset(-adjust);
   2744 }
   2745 
   2746 void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
   2747   X86_64ManagedRegister src = msrc.AsX86_64();
   2748   if (src.IsNoRegister()) {
   2749     CHECK_EQ(0u, size);
   2750   } else if (src.IsCpuRegister()) {
   2751     if (size == 4) {
   2752       CHECK_EQ(4u, size);
   2753       movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
   2754     } else {
   2755       CHECK_EQ(8u, size);
   2756       movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
   2757     }
   2758   } else if (src.IsRegisterPair()) {
   2759     CHECK_EQ(0u, size);
   2760     movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
   2761     movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
   2762          src.AsRegisterPairHigh());
   2763   } else if (src.IsX87Register()) {
   2764     if (size == 4) {
   2765       fstps(Address(CpuRegister(RSP), offs));
   2766     } else {
   2767       fstpl(Address(CpuRegister(RSP), offs));
   2768     }
   2769   } else {
   2770     CHECK(src.IsXmmRegister());
   2771     if (size == 4) {
   2772       movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
   2773     } else {
   2774       movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
   2775     }
   2776   }
   2777 }
   2778 
   2779 void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
   2780   X86_64ManagedRegister src = msrc.AsX86_64();
   2781   CHECK(src.IsCpuRegister());
   2782   movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
   2783 }
   2784 
   2785 void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
   2786   X86_64ManagedRegister src = msrc.AsX86_64();
   2787   CHECK(src.IsCpuRegister());
   2788   movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
   2789 }
   2790 
   2791 void X86_64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
   2792                                             ManagedRegister) {
   2793   movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
   2794 }
   2795 
   2796 void X86_64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
   2797                                                ManagedRegister) {
   2798   gs()->movl(Address::Absolute(dest, true), Immediate(imm));  // TODO(64) movq?
   2799 }
   2800 
   2801 void X86_64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
   2802                                                  FrameOffset fr_offs,
   2803                                                  ManagedRegister mscratch) {
   2804   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   2805   CHECK(scratch.IsCpuRegister());
   2806   leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
   2807   gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
   2808 }
   2809 
   2810 void X86_64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
   2811   gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
   2812 }
   2813 
   2814 void X86_64Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
   2815                                  FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
   2816   UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
   2817 }
   2818 
   2819 void X86_64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
   2820   X86_64ManagedRegister dest = mdest.AsX86_64();
   2821   if (dest.IsNoRegister()) {
   2822     CHECK_EQ(0u, size);
   2823   } else if (dest.IsCpuRegister()) {
   2824     if (size == 4) {
   2825       CHECK_EQ(4u, size);
   2826       movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
   2827     } else {
   2828       CHECK_EQ(8u, size);
   2829       movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
   2830     }
   2831   } else if (dest.IsRegisterPair()) {
   2832     CHECK_EQ(0u, size);
   2833     movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
   2834     movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
   2835   } else if (dest.IsX87Register()) {
   2836     if (size == 4) {
   2837       flds(Address(CpuRegister(RSP), src));
   2838     } else {
   2839       fldl(Address(CpuRegister(RSP), src));
   2840     }
   2841   } else {
   2842     CHECK(dest.IsXmmRegister());
   2843     if (size == 4) {
   2844       movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
   2845     } else {
   2846       movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
   2847     }
   2848   }
   2849 }
   2850 
   2851 void X86_64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
   2852   X86_64ManagedRegister dest = mdest.AsX86_64();
   2853   if (dest.IsNoRegister()) {
   2854     CHECK_EQ(0u, size);
   2855   } else if (dest.IsCpuRegister()) {
   2856     CHECK_EQ(4u, size);
   2857     gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
   2858   } else if (dest.IsRegisterPair()) {
   2859     CHECK_EQ(8u, size);
   2860     gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
   2861   } else if (dest.IsX87Register()) {
   2862     if (size == 4) {
   2863       gs()->flds(Address::Absolute(src, true));
   2864     } else {
   2865       gs()->fldl(Address::Absolute(src, true));
   2866     }
   2867   } else {
   2868     CHECK(dest.IsXmmRegister());
   2869     if (size == 4) {
   2870       gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
   2871     } else {
   2872       gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
   2873     }
   2874   }
   2875 }
   2876 
   2877 void X86_64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
   2878   X86_64ManagedRegister dest = mdest.AsX86_64();
   2879   CHECK(dest.IsCpuRegister());
   2880   movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
   2881 }
   2882 
   2883 void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
   2884                               bool unpoison_reference) {
   2885   X86_64ManagedRegister dest = mdest.AsX86_64();
   2886   CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
   2887   movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
   2888   if (unpoison_reference) {
   2889     MaybeUnpoisonHeapReference(dest.AsCpuRegister());
   2890   }
   2891 }
   2892 
   2893 void X86_64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
   2894                               Offset offs) {
   2895   X86_64ManagedRegister dest = mdest.AsX86_64();
   2896   CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
   2897   movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
   2898 }
   2899 
   2900 void X86_64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) {
   2901   X86_64ManagedRegister dest = mdest.AsX86_64();
   2902   CHECK(dest.IsCpuRegister());
   2903   gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
   2904 }
   2905 
   2906 void X86_64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
   2907   X86_64ManagedRegister reg = mreg.AsX86_64();
   2908   CHECK(size == 1 || size == 2) << size;
   2909   CHECK(reg.IsCpuRegister()) << reg;
   2910   if (size == 1) {
   2911     movsxb(reg.AsCpuRegister(), reg.AsCpuRegister());
   2912   } else {
   2913     movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
   2914   }
   2915 }
   2916 
   2917 void X86_64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
   2918   X86_64ManagedRegister reg = mreg.AsX86_64();
   2919   CHECK(size == 1 || size == 2) << size;
   2920   CHECK(reg.IsCpuRegister()) << reg;
   2921   if (size == 1) {
   2922     movzxb(reg.AsCpuRegister(), reg.AsCpuRegister());
   2923   } else {
   2924     movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
   2925   }
   2926 }
   2927 
   2928 void X86_64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
   2929   X86_64ManagedRegister dest = mdest.AsX86_64();
   2930   X86_64ManagedRegister src = msrc.AsX86_64();
   2931   if (!dest.Equals(src)) {
   2932     if (dest.IsCpuRegister() && src.IsCpuRegister()) {
   2933       movq(dest.AsCpuRegister(), src.AsCpuRegister());
   2934     } else if (src.IsX87Register() && dest.IsXmmRegister()) {
   2935       // Pass via stack and pop X87 register
   2936       subl(CpuRegister(RSP), Immediate(16));
   2937       if (size == 4) {
   2938         CHECK_EQ(src.AsX87Register(), ST0);
   2939         fstps(Address(CpuRegister(RSP), 0));
   2940         movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
   2941       } else {
   2942         CHECK_EQ(src.AsX87Register(), ST0);
   2943         fstpl(Address(CpuRegister(RSP), 0));
   2944         movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
   2945       }
   2946       addq(CpuRegister(RSP), Immediate(16));
   2947     } else {
   2948       // TODO: x87, SSE
   2949       UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
   2950     }
   2951   }
   2952 }
   2953 
   2954 void X86_64Assembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
   2955   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   2956   CHECK(scratch.IsCpuRegister());
   2957   movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
   2958   movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
   2959 }
   2960 
   2961 void X86_64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
   2962                                              ThreadOffset<8> thr_offs,
   2963                                              ManagedRegister mscratch) {
   2964   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   2965   CHECK(scratch.IsCpuRegister());
   2966   gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
   2967   Store(fr_offs, scratch, 8);
   2968 }
   2969 
   2970 void X86_64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
   2971                                            FrameOffset fr_offs,
   2972                                            ManagedRegister mscratch) {
   2973   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   2974   CHECK(scratch.IsCpuRegister());
   2975   Load(scratch, fr_offs, 8);
   2976   gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
   2977 }
   2978 
   2979 void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch,
   2980                            size_t size) {
   2981   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   2982   if (scratch.IsCpuRegister() && size == 8) {
   2983     Load(scratch, src, 4);
   2984     Store(dest, scratch, 4);
   2985     Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
   2986     Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
   2987   } else {
   2988     Load(scratch, src, size);
   2989     Store(dest, scratch, size);
   2990   }
   2991 }
   2992 
   2993 void X86_64Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
   2994                         ManagedRegister /*scratch*/, size_t /*size*/) {
   2995   UNIMPLEMENTED(FATAL);
   2996 }
   2997 
   2998 void X86_64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
   2999                         ManagedRegister scratch, size_t size) {
   3000   CHECK(scratch.IsNoRegister());
   3001   CHECK_EQ(size, 4u);
   3002   pushq(Address(CpuRegister(RSP), src));
   3003   popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
   3004 }
   3005 
   3006 void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
   3007                         ManagedRegister mscratch, size_t size) {
   3008   CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
   3009   CHECK_EQ(size, 4u);
   3010   movq(scratch, Address(CpuRegister(RSP), src_base));
   3011   movq(scratch, Address(scratch, src_offset));
   3012   movq(Address(CpuRegister(RSP), dest), scratch);
   3013 }
   3014 
   3015 void X86_64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
   3016                         ManagedRegister src, Offset src_offset,
   3017                         ManagedRegister scratch, size_t size) {
   3018   CHECK_EQ(size, 4u);
   3019   CHECK(scratch.IsNoRegister());
   3020   pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
   3021   popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
   3022 }
   3023 
   3024 void X86_64Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
   3025                         ManagedRegister mscratch, size_t size) {
   3026   CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
   3027   CHECK_EQ(size, 4u);
   3028   CHECK_EQ(dest.Int32Value(), src.Int32Value());
   3029   movq(scratch, Address(CpuRegister(RSP), src));
   3030   pushq(Address(scratch, src_offset));
   3031   popq(Address(scratch, dest_offset));
   3032 }
   3033 
   3034 void X86_64Assembler::MemoryBarrier(ManagedRegister) {
   3035   mfence();
   3036 }
   3037 
   3038 void X86_64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
   3039                                    FrameOffset handle_scope_offset,
   3040                                    ManagedRegister min_reg, bool null_allowed) {
   3041   X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
   3042   X86_64ManagedRegister in_reg = min_reg.AsX86_64();
   3043   if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
   3044     // Use out_reg as indicator of null.
   3045     in_reg = out_reg;
   3046     // TODO: movzwl
   3047     movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   3048   }
   3049   CHECK(in_reg.IsCpuRegister());
   3050   CHECK(out_reg.IsCpuRegister());
   3051   VerifyObject(in_reg, null_allowed);
   3052   if (null_allowed) {
   3053     Label null_arg;
   3054     if (!out_reg.Equals(in_reg)) {
   3055       xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
   3056     }
   3057     testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
   3058     j(kZero, &null_arg);
   3059     leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   3060     Bind(&null_arg);
   3061   } else {
   3062     leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   3063   }
   3064 }
   3065 
   3066 void X86_64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
   3067                                    FrameOffset handle_scope_offset,
   3068                                    ManagedRegister mscratch,
   3069                                    bool null_allowed) {
   3070   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   3071   CHECK(scratch.IsCpuRegister());
   3072   if (null_allowed) {
   3073     Label null_arg;
   3074     movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   3075     testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
   3076     j(kZero, &null_arg);
   3077     leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   3078     Bind(&null_arg);
   3079   } else {
   3080     leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   3081   }
   3082   Store(out_off, scratch, 8);
   3083 }
   3084 
   3085 // Given a handle scope entry, load the associated reference.
   3086 void X86_64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
   3087                                          ManagedRegister min_reg) {
   3088   X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
   3089   X86_64ManagedRegister in_reg = min_reg.AsX86_64();
   3090   CHECK(out_reg.IsCpuRegister());
   3091   CHECK(in_reg.IsCpuRegister());
   3092   Label null_arg;
   3093   if (!out_reg.Equals(in_reg)) {
   3094     xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
   3095   }
   3096   testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
   3097   j(kZero, &null_arg);
   3098   movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
   3099   Bind(&null_arg);
   3100 }
   3101 
   3102 void X86_64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
   3103   // TODO: not validating references
   3104 }
   3105 
   3106 void X86_64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
   3107   // TODO: not validating references
   3108 }
   3109 
   3110 void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
   3111   X86_64ManagedRegister base = mbase.AsX86_64();
   3112   CHECK(base.IsCpuRegister());
   3113   call(Address(base.AsCpuRegister(), offset.Int32Value()));
   3114   // TODO: place reference map on call
   3115 }
   3116 
   3117 void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
   3118   CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
   3119   movq(scratch, Address(CpuRegister(RSP), base));
   3120   call(Address(scratch, offset));
   3121 }
   3122 
   3123 void X86_64Assembler::CallFromThread64(ThreadOffset<8> offset, ManagedRegister /*mscratch*/) {
   3124   gs()->call(Address::Absolute(offset, true));
   3125 }
   3126 
   3127 void X86_64Assembler::GetCurrentThread(ManagedRegister tr) {
   3128   gs()->movq(tr.AsX86_64().AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true));
   3129 }
   3130 
   3131 void X86_64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
   3132   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   3133   gs()->movq(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true));
   3134   movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
   3135 }
   3136 
   3137 // Slowpath entered when Thread::Current()->_exception is non-null
   3138 class X86_64ExceptionSlowPath FINAL : public SlowPath {
   3139  public:
   3140   explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
   3141   virtual void Emit(Assembler *sp_asm) OVERRIDE;
   3142  private:
   3143   const size_t stack_adjust_;
   3144 };
   3145 
   3146 void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
   3147   X86_64ExceptionSlowPath* slow = new (GetArena()) X86_64ExceptionSlowPath(stack_adjust);
   3148   buffer_.EnqueueSlowPath(slow);
   3149   gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<8>(), true), Immediate(0));
   3150   j(kNotEqual, slow->Entry());
   3151 }
   3152 
   3153 void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
   3154   X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
   3155 #define __ sp_asm->
   3156   __ Bind(&entry_);
   3157   // Note: the return value is dead
   3158   if (stack_adjust_ != 0) {  // Fix up the frame.
   3159     __ DecreaseFrameSize(stack_adjust_);
   3160   }
   3161   // Pass exception as argument in RDI
   3162   __ gs()->movq(CpuRegister(RDI), Address::Absolute(Thread::ExceptionOffset<8>(), true));
   3163   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(8, pDeliverException), true));
   3164   // this call should never return
   3165   __ int3();
   3166 #undef __
   3167 }
   3168 
   3169 void X86_64Assembler::AddConstantArea() {
   3170   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   3171   for (size_t i = 0, e = area.size(); i < e; i++) {
   3172     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   3173     EmitInt32(area[i]);
   3174   }
   3175 }
   3176 
   3177 size_t ConstantArea::AppendInt32(int32_t v) {
   3178   size_t result = buffer_.size() * elem_size_;
   3179   buffer_.push_back(v);
   3180   return result;
   3181 }
   3182 
   3183 size_t ConstantArea::AddInt32(int32_t v) {
   3184   // Look for an existing match.
   3185   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
   3186     if (v == buffer_[i]) {
   3187       return i * elem_size_;
   3188     }
   3189   }
   3190 
   3191   // Didn't match anything.
   3192   return AppendInt32(v);
   3193 }
   3194 
   3195 size_t ConstantArea::AddInt64(int64_t v) {
   3196   int32_t v_low = v;
   3197   int32_t v_high = v >> 32;
   3198   if (buffer_.size() > 1) {
   3199     // Ensure we don't pass the end of the buffer.
   3200     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
   3201       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
   3202         return i * elem_size_;
   3203       }
   3204     }
   3205   }
   3206 
   3207   // Didn't match anything.
   3208   size_t result = buffer_.size() * elem_size_;
   3209   buffer_.push_back(v_low);
   3210   buffer_.push_back(v_high);
   3211   return result;
   3212 }
   3213 
   3214 size_t ConstantArea::AddDouble(double v) {
   3215   // Treat the value as a 64-bit integer value.
   3216   return AddInt64(bit_cast<int64_t, double>(v));
   3217 }
   3218 
   3219 size_t ConstantArea::AddFloat(float v) {
   3220   // Treat the value as a 32-bit integer value.
   3221   return AddInt32(bit_cast<int32_t, float>(v));
   3222 }
   3223 
   3224 }  // namespace x86_64
   3225 }  // namespace art
   3226