Home | History | Annotate | Download | only in x86_64
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "assembler_x86_64.h"
     18 
     19 #include "base/casts.h"
     20 #include "entrypoints/quick/quick_entrypoints.h"
     21 #include "memory_region.h"
     22 #include "thread.h"
     23 
     24 namespace art {
     25 namespace x86_64 {
     26 
     27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
     28   return os << reg.AsRegister();
     29 }
     30 
     31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
     32   return os << reg.AsFloatRegister();
     33 }
     34 
     35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
     36   return os << "ST" << static_cast<int>(reg);
     37 }
     38 
     39 void X86_64Assembler::call(CpuRegister reg) {
     40   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     41   EmitOptionalRex32(reg);
     42   EmitUint8(0xFF);
     43   EmitRegisterOperand(2, reg.LowBits());
     44 }
     45 
     46 
     47 void X86_64Assembler::call(const Address& address) {
     48   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     49   EmitOptionalRex32(address);
     50   EmitUint8(0xFF);
     51   EmitOperand(2, address);
     52 }
     53 
     54 
     55 void X86_64Assembler::call(Label* label) {
     56   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     57   EmitUint8(0xE8);
     58   static const int kSize = 5;
     59   // Offset by one because we already have emitted the opcode.
     60   EmitLabel(label, kSize - 1);
     61 }
     62 
     63 void X86_64Assembler::pushq(CpuRegister reg) {
     64   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     65   EmitOptionalRex32(reg);
     66   EmitUint8(0x50 + reg.LowBits());
     67 }
     68 
     69 
     70 void X86_64Assembler::pushq(const Address& address) {
     71   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     72   EmitOptionalRex32(address);
     73   EmitUint8(0xFF);
     74   EmitOperand(6, address);
     75 }
     76 
     77 
     78 void X86_64Assembler::pushq(const Immediate& imm) {
     79   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     80   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
     81   if (imm.is_int8()) {
     82     EmitUint8(0x6A);
     83     EmitUint8(imm.value() & 0xFF);
     84   } else {
     85     EmitUint8(0x68);
     86     EmitImmediate(imm);
     87   }
     88 }
     89 
     90 
     91 void X86_64Assembler::popq(CpuRegister reg) {
     92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     93   EmitOptionalRex32(reg);
     94   EmitUint8(0x58 + reg.LowBits());
     95 }
     96 
     97 
     98 void X86_64Assembler::popq(const Address& address) {
     99   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    100   EmitOptionalRex32(address);
    101   EmitUint8(0x8F);
    102   EmitOperand(0, address);
    103 }
    104 
    105 
    106 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
    107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    108   if (imm.is_int32()) {
    109     // 32 bit. Note: sign-extends.
    110     EmitRex64(dst);
    111     EmitUint8(0xC7);
    112     EmitRegisterOperand(0, dst.LowBits());
    113     EmitInt32(static_cast<int32_t>(imm.value()));
    114   } else {
    115     EmitRex64(dst);
    116     EmitUint8(0xB8 + dst.LowBits());
    117     EmitInt64(imm.value());
    118   }
    119 }
    120 
    121 
    122 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
    123   CHECK(imm.is_int32());
    124   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    125   EmitOptionalRex32(dst);
    126   EmitUint8(0xB8 + dst.LowBits());
    127   EmitImmediate(imm);
    128 }
    129 
    130 
    131 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
    132   CHECK(imm.is_int32());
    133   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    134   EmitRex64(dst);
    135   EmitUint8(0xC7);
    136   EmitOperand(0, dst);
    137   EmitImmediate(imm);
    138 }
    139 
    140 
    141 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
    142   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    143   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
    144   EmitRex64(src, dst);
    145   EmitUint8(0x89);
    146   EmitRegisterOperand(src.LowBits(), dst.LowBits());
    147 }
    148 
    149 
    150 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
    151   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    152   EmitOptionalRex32(dst, src);
    153   EmitUint8(0x8B);
    154   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    155 }
    156 
    157 
    158 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
    159   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    160   EmitRex64(dst, src);
    161   EmitUint8(0x8B);
    162   EmitOperand(dst.LowBits(), src);
    163 }
    164 
    165 
    166 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
    167   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    168   EmitOptionalRex32(dst, src);
    169   EmitUint8(0x8B);
    170   EmitOperand(dst.LowBits(), src);
    171 }
    172 
    173 
    174 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
    175   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    176   EmitRex64(src, dst);
    177   EmitUint8(0x89);
    178   EmitOperand(src.LowBits(), dst);
    179 }
    180 
    181 
    182 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
    183   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    184   EmitOptionalRex32(src, dst);
    185   EmitUint8(0x89);
    186   EmitOperand(src.LowBits(), dst);
    187 }
    188 
    189 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
    190   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    191   EmitOptionalRex32(dst);
    192   EmitUint8(0xC7);
    193   EmitOperand(0, dst);
    194   EmitImmediate(imm);
    195 }
    196 
    197 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
    198   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    199   EmitOptionalRex32(src, dst);
    200   EmitUint8(0x0F);
    201   EmitUint8(0xC3);
    202   EmitOperand(src.LowBits(), dst);
    203 }
    204 
    205 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
    206   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    207   EmitRex64(src, dst);
    208   EmitUint8(0x0F);
    209   EmitUint8(0xC3);
    210   EmitOperand(src.LowBits(), dst);
    211 }
    212 
    213 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
    214   cmov(c, dst, src, true);
    215 }
    216 
    217 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
    218   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    219   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
    220   EmitUint8(0x0F);
    221   EmitUint8(0x40 + c);
    222   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    223 }
    224 
    225 
    226 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
    227   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    228   if (is64bit) {
    229     EmitRex64(dst, src);
    230   } else {
    231     EmitOptionalRex32(dst, src);
    232   }
    233   EmitUint8(0x0F);
    234   EmitUint8(0x40 + c);
    235   EmitOperand(dst.LowBits(), src);
    236 }
    237 
    238 
    239 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
    240   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    241   EmitOptionalByteRegNormalizingRex32(dst, src);
    242   EmitUint8(0x0F);
    243   EmitUint8(0xB6);
    244   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    245 }
    246 
    247 
    248 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
    249   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    250   // Byte register is only in the source register form, so we don't use
    251   // EmitOptionalByteRegNormalizingRex32(dst, src);
    252   EmitOptionalRex32(dst, src);
    253   EmitUint8(0x0F);
    254   EmitUint8(0xB6);
    255   EmitOperand(dst.LowBits(), src);
    256 }
    257 
    258 
    259 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
    260   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    261   EmitOptionalByteRegNormalizingRex32(dst, src);
    262   EmitUint8(0x0F);
    263   EmitUint8(0xBE);
    264   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    265 }
    266 
    267 
    268 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
    269   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    270   // Byte register is only in the source register form, so we don't use
    271   // EmitOptionalByteRegNormalizingRex32(dst, src);
    272   EmitOptionalRex32(dst, src);
    273   EmitUint8(0x0F);
    274   EmitUint8(0xBE);
    275   EmitOperand(dst.LowBits(), src);
    276 }
    277 
    278 
    279 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
    280   LOG(FATAL) << "Use movzxb or movsxb instead.";
    281 }
    282 
    283 
    284 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
    285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    286   EmitOptionalByteRegNormalizingRex32(src, dst);
    287   EmitUint8(0x88);
    288   EmitOperand(src.LowBits(), dst);
    289 }
    290 
    291 
    292 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
    293   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    294   EmitOptionalRex32(dst);
    295   EmitUint8(0xC6);
    296   EmitOperand(Register::RAX, dst);
    297   CHECK(imm.is_int8());
    298   EmitUint8(imm.value() & 0xFF);
    299 }
    300 
    301 
    302 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
    303   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    304   EmitOptionalRex32(dst, src);
    305   EmitUint8(0x0F);
    306   EmitUint8(0xB7);
    307   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    308 }
    309 
    310 
    311 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
    312   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    313   EmitOptionalRex32(dst, src);
    314   EmitUint8(0x0F);
    315   EmitUint8(0xB7);
    316   EmitOperand(dst.LowBits(), src);
    317 }
    318 
    319 
    320 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
    321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    322   EmitOptionalRex32(dst, src);
    323   EmitUint8(0x0F);
    324   EmitUint8(0xBF);
    325   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    326 }
    327 
    328 
    329 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
    330   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    331   EmitOptionalRex32(dst, src);
    332   EmitUint8(0x0F);
    333   EmitUint8(0xBF);
    334   EmitOperand(dst.LowBits(), src);
    335 }
    336 
    337 
    338 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
    339   LOG(FATAL) << "Use movzxw or movsxw instead.";
    340 }
    341 
    342 
    343 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
    344   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    345   EmitOperandSizeOverride();
    346   EmitOptionalRex32(src, dst);
    347   EmitUint8(0x89);
    348   EmitOperand(src.LowBits(), dst);
    349 }
    350 
    351 
    352 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
    353   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    354   EmitOperandSizeOverride();
    355   EmitOptionalRex32(dst);
    356   EmitUint8(0xC7);
    357   EmitOperand(Register::RAX, dst);
    358   CHECK(imm.is_uint16() || imm.is_int16());
    359   EmitUint8(imm.value() & 0xFF);
    360   EmitUint8(imm.value() >> 8);
    361 }
    362 
    363 
    364 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
    365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    366   EmitRex64(dst, src);
    367   EmitUint8(0x8D);
    368   EmitOperand(dst.LowBits(), src);
    369 }
    370 
    371 
    372 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
    373   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    374   EmitOptionalRex32(dst, src);
    375   EmitUint8(0x8D);
    376   EmitOperand(dst.LowBits(), src);
    377 }
    378 
    379 
    380 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
    381   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    382   EmitOptionalRex32(dst, src);
    383   EmitUint8(0x0F);
    384   EmitUint8(0x28);
    385   EmitXmmRegisterOperand(dst.LowBits(), src);
    386 }
    387 
    388 
    389 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
    390   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    391   EmitOptionalRex32(dst, src);
    392   EmitUint8(0x0F);
    393   EmitUint8(0x28);
    394   EmitOperand(dst.LowBits(), src);
    395 }
    396 
    397 
    398 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
    399   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    400   EmitOptionalRex32(dst, src);
    401   EmitUint8(0x0F);
    402   EmitUint8(0x10);
    403   EmitOperand(dst.LowBits(), src);
    404 }
    405 
    406 
    407 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
    408   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    409   EmitOptionalRex32(src, dst);
    410   EmitUint8(0x0F);
    411   EmitUint8(0x29);
    412   EmitOperand(src.LowBits(), dst);
    413 }
    414 
    415 
    416 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
    417   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    418   EmitOptionalRex32(src, dst);
    419   EmitUint8(0x0F);
    420   EmitUint8(0x11);
    421   EmitOperand(src.LowBits(), dst);
    422 }
    423 
    424 
    425 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
    426   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    427   EmitUint8(0xF3);
    428   EmitOptionalRex32(dst, src);
    429   EmitUint8(0x0F);
    430   EmitUint8(0x10);
    431   EmitOperand(dst.LowBits(), src);
    432 }
    433 
    434 
    435 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
    436   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    437   EmitUint8(0xF3);
    438   EmitOptionalRex32(src, dst);
    439   EmitUint8(0x0F);
    440   EmitUint8(0x11);
    441   EmitOperand(src.LowBits(), dst);
    442 }
    443 
    444 
    445 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
    446   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    447   EmitUint8(0xF3);
    448   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
    449   EmitUint8(0x0F);
    450   EmitUint8(0x11);
    451   EmitXmmRegisterOperand(src.LowBits(), dst);
    452 }
    453 
    454 
    455 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
    456   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    457   EmitRex64(dst, src);
    458   EmitUint8(0x63);
    459   EmitRegisterOperand(dst.LowBits(), src.LowBits());
    460 }
    461 
    462 
    463 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
    464   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    465   EmitRex64(dst, src);
    466   EmitUint8(0x63);
    467   EmitOperand(dst.LowBits(), src);
    468 }
    469 
    470 
    471 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
    472   movd(dst, src, true);
    473 }
    474 
    475 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
    476   movd(dst, src, true);
    477 }
    478 
    479 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
    480   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    481   EmitUint8(0x66);
    482   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
    483   EmitUint8(0x0F);
    484   EmitUint8(0x6E);
    485   EmitOperand(dst.LowBits(), Operand(src));
    486 }
    487 
    488 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
    489   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    490   EmitUint8(0x66);
    491   EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
    492   EmitUint8(0x0F);
    493   EmitUint8(0x7E);
    494   EmitOperand(src.LowBits(), Operand(dst));
    495 }
    496 
    497 
    498 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
    499   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    500   EmitUint8(0xF3);
    501   EmitOptionalRex32(dst, src);
    502   EmitUint8(0x0F);
    503   EmitUint8(0x58);
    504   EmitXmmRegisterOperand(dst.LowBits(), src);
    505 }
    506 
    507 
    508 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
    509   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    510   EmitUint8(0xF3);
    511   EmitOptionalRex32(dst, src);
    512   EmitUint8(0x0F);
    513   EmitUint8(0x58);
    514   EmitOperand(dst.LowBits(), src);
    515 }
    516 
    517 
    518 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
    519   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    520   EmitUint8(0xF3);
    521   EmitOptionalRex32(dst, src);
    522   EmitUint8(0x0F);
    523   EmitUint8(0x5C);
    524   EmitXmmRegisterOperand(dst.LowBits(), src);
    525 }
    526 
    527 
    528 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
    529   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    530   EmitUint8(0xF3);
    531   EmitOptionalRex32(dst, src);
    532   EmitUint8(0x0F);
    533   EmitUint8(0x5C);
    534   EmitOperand(dst.LowBits(), src);
    535 }
    536 
    537 
    538 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
    539   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    540   EmitUint8(0xF3);
    541   EmitOptionalRex32(dst, src);
    542   EmitUint8(0x0F);
    543   EmitUint8(0x59);
    544   EmitXmmRegisterOperand(dst.LowBits(), src);
    545 }
    546 
    547 
    548 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
    549   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    550   EmitUint8(0xF3);
    551   EmitOptionalRex32(dst, src);
    552   EmitUint8(0x0F);
    553   EmitUint8(0x59);
    554   EmitOperand(dst.LowBits(), src);
    555 }
    556 
    557 
    558 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
    559   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    560   EmitUint8(0xF3);
    561   EmitOptionalRex32(dst, src);
    562   EmitUint8(0x0F);
    563   EmitUint8(0x5E);
    564   EmitXmmRegisterOperand(dst.LowBits(), src);
    565 }
    566 
    567 
    568 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
    569   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    570   EmitUint8(0xF3);
    571   EmitOptionalRex32(dst, src);
    572   EmitUint8(0x0F);
    573   EmitUint8(0x5E);
    574   EmitOperand(dst.LowBits(), src);
    575 }
    576 
    577 
    578 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
    579   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    580   EmitOptionalRex32(dst, src);
    581   EmitUint8(0x0F);
    582   EmitUint8(0x58);
    583   EmitXmmRegisterOperand(dst.LowBits(), src);
    584 }
    585 
    586 
    587 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
    588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    589   EmitOptionalRex32(dst, src);
    590   EmitUint8(0x0F);
    591   EmitUint8(0x5C);
    592   EmitXmmRegisterOperand(dst.LowBits(), src);
    593 }
    594 
    595 
    596 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
    597   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    598   EmitOptionalRex32(dst, src);
    599   EmitUint8(0x0F);
    600   EmitUint8(0x59);
    601   EmitXmmRegisterOperand(dst.LowBits(), src);
    602 }
    603 
    604 
    605 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
    606   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    607   EmitOptionalRex32(dst, src);
    608   EmitUint8(0x0F);
    609   EmitUint8(0x5E);
    610   EmitXmmRegisterOperand(dst.LowBits(), src);
    611 }
    612 
    613 
    614 void X86_64Assembler::flds(const Address& src) {
    615   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    616   EmitUint8(0xD9);
    617   EmitOperand(0, src);
    618 }
    619 
    620 
    621 void X86_64Assembler::fsts(const Address& dst) {
    622   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    623   EmitUint8(0xD9);
    624   EmitOperand(2, dst);
    625 }
    626 
    627 
    628 void X86_64Assembler::fstps(const Address& dst) {
    629   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    630   EmitUint8(0xD9);
    631   EmitOperand(3, dst);
    632 }
    633 
    634 
    635 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
    636   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    637   EmitUint8(0x66);
    638   EmitOptionalRex32(dst, src);
    639   EmitUint8(0x0F);
    640   EmitUint8(0x28);
    641   EmitXmmRegisterOperand(dst.LowBits(), src);
    642 }
    643 
    644 
    645 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
    646   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    647   EmitUint8(0x66);
    648   EmitOptionalRex32(dst, src);
    649   EmitUint8(0x0F);
    650   EmitUint8(0x28);
    651   EmitOperand(dst.LowBits(), src);
    652 }
    653 
    654 
    655 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
    656   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    657   EmitUint8(0x66);
    658   EmitOptionalRex32(dst, src);
    659   EmitUint8(0x0F);
    660   EmitUint8(0x10);
    661   EmitOperand(dst.LowBits(), src);
    662 }
    663 
    664 
    665 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
    666   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    667   EmitUint8(0x66);
    668   EmitOptionalRex32(src, dst);
    669   EmitUint8(0x0F);
    670   EmitUint8(0x29);
    671   EmitOperand(src.LowBits(), dst);
    672 }
    673 
    674 
    675 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
    676   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    677   EmitUint8(0x66);
    678   EmitOptionalRex32(src, dst);
    679   EmitUint8(0x0F);
    680   EmitUint8(0x11);
    681   EmitOperand(src.LowBits(), dst);
    682 }
    683 
    684 
    685 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
    686   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    687   EmitUint8(0xF2);
    688   EmitOptionalRex32(dst, src);
    689   EmitUint8(0x0F);
    690   EmitUint8(0x10);
    691   EmitOperand(dst.LowBits(), src);
    692 }
    693 
    694 
    695 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
    696   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    697   EmitUint8(0xF2);
    698   EmitOptionalRex32(src, dst);
    699   EmitUint8(0x0F);
    700   EmitUint8(0x11);
    701   EmitOperand(src.LowBits(), dst);
    702 }
    703 
    704 
    705 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
    706   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    707   EmitUint8(0xF2);
    708   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
    709   EmitUint8(0x0F);
    710   EmitUint8(0x11);
    711   EmitXmmRegisterOperand(src.LowBits(), dst);
    712 }
    713 
    714 
    715 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
    716   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    717   EmitUint8(0xF2);
    718   EmitOptionalRex32(dst, src);
    719   EmitUint8(0x0F);
    720   EmitUint8(0x58);
    721   EmitXmmRegisterOperand(dst.LowBits(), src);
    722 }
    723 
    724 
    725 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
    726   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    727   EmitUint8(0xF2);
    728   EmitOptionalRex32(dst, src);
    729   EmitUint8(0x0F);
    730   EmitUint8(0x58);
    731   EmitOperand(dst.LowBits(), src);
    732 }
    733 
    734 
    735 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
    736   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    737   EmitUint8(0xF2);
    738   EmitOptionalRex32(dst, src);
    739   EmitUint8(0x0F);
    740   EmitUint8(0x5C);
    741   EmitXmmRegisterOperand(dst.LowBits(), src);
    742 }
    743 
    744 
    745 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
    746   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    747   EmitUint8(0xF2);
    748   EmitOptionalRex32(dst, src);
    749   EmitUint8(0x0F);
    750   EmitUint8(0x5C);
    751   EmitOperand(dst.LowBits(), src);
    752 }
    753 
    754 
    755 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
    756   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    757   EmitUint8(0xF2);
    758   EmitOptionalRex32(dst, src);
    759   EmitUint8(0x0F);
    760   EmitUint8(0x59);
    761   EmitXmmRegisterOperand(dst.LowBits(), src);
    762 }
    763 
    764 
    765 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
    766   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    767   EmitUint8(0xF2);
    768   EmitOptionalRex32(dst, src);
    769   EmitUint8(0x0F);
    770   EmitUint8(0x59);
    771   EmitOperand(dst.LowBits(), src);
    772 }
    773 
    774 
    775 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
    776   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    777   EmitUint8(0xF2);
    778   EmitOptionalRex32(dst, src);
    779   EmitUint8(0x0F);
    780   EmitUint8(0x5E);
    781   EmitXmmRegisterOperand(dst.LowBits(), src);
    782 }
    783 
    784 
    785 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
    786   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    787   EmitUint8(0xF2);
    788   EmitOptionalRex32(dst, src);
    789   EmitUint8(0x0F);
    790   EmitUint8(0x5E);
    791   EmitOperand(dst.LowBits(), src);
    792 }
    793 
    794 
    795 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
    796   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    797   EmitUint8(0x66);
    798   EmitOptionalRex32(dst, src);
    799   EmitUint8(0x0F);
    800   EmitUint8(0x58);
    801   EmitXmmRegisterOperand(dst.LowBits(), src);
    802 }
    803 
    804 
    805 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
    806   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    807   EmitUint8(0x66);
    808   EmitOptionalRex32(dst, src);
    809   EmitUint8(0x0F);
    810   EmitUint8(0x5C);
    811   EmitXmmRegisterOperand(dst.LowBits(), src);
    812 }
    813 
    814 
    815 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
    816   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    817   EmitUint8(0x66);
    818   EmitOptionalRex32(dst, src);
    819   EmitUint8(0x0F);
    820   EmitUint8(0x59);
    821   EmitXmmRegisterOperand(dst.LowBits(), src);
    822 }
    823 
    824 
    825 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
    826   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    827   EmitUint8(0x66);
    828   EmitOptionalRex32(dst, src);
    829   EmitUint8(0x0F);
    830   EmitUint8(0x5E);
    831   EmitXmmRegisterOperand(dst.LowBits(), src);
    832 }
    833 
    834 
    835 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
    836   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    837   EmitUint8(0x66);
    838   EmitOptionalRex32(dst, src);
    839   EmitUint8(0x0F);
    840   EmitUint8(0x6F);
    841   EmitXmmRegisterOperand(dst.LowBits(), src);
    842 }
    843 
    844 
    845 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
    846   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    847   EmitUint8(0x66);
    848   EmitOptionalRex32(dst, src);
    849   EmitUint8(0x0F);
    850   EmitUint8(0x6F);
    851   EmitOperand(dst.LowBits(), src);
    852 }
    853 
    854 
    855 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
    856   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    857   EmitUint8(0xF3);
    858   EmitOptionalRex32(dst, src);
    859   EmitUint8(0x0F);
    860   EmitUint8(0x6F);
    861   EmitOperand(dst.LowBits(), src);
    862 }
    863 
    864 
    865 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
    866   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    867   EmitUint8(0x66);
    868   EmitOptionalRex32(src, dst);
    869   EmitUint8(0x0F);
    870   EmitUint8(0x7F);
    871   EmitOperand(src.LowBits(), dst);
    872 }
    873 
    874 
    875 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
    876   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    877   EmitUint8(0xF3);
    878   EmitOptionalRex32(src, dst);
    879   EmitUint8(0x0F);
    880   EmitUint8(0x7F);
    881   EmitOperand(src.LowBits(), dst);
    882 }
    883 
    884 
    885 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
    886   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    887   EmitUint8(0x66);
    888   EmitOptionalRex32(dst, src);
    889   EmitUint8(0x0F);
    890   EmitUint8(0xFC);
    891   EmitXmmRegisterOperand(dst.LowBits(), src);
    892 }
    893 
    894 
    895 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
    896   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    897   EmitUint8(0x66);
    898   EmitOptionalRex32(dst, src);
    899   EmitUint8(0x0F);
    900   EmitUint8(0xF8);
    901   EmitXmmRegisterOperand(dst.LowBits(), src);
    902 }
    903 
    904 
    905 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
    906   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    907   EmitUint8(0x66);
    908   EmitOptionalRex32(dst, src);
    909   EmitUint8(0x0F);
    910   EmitUint8(0xFD);
    911   EmitXmmRegisterOperand(dst.LowBits(), src);
    912 }
    913 
    914 
    915 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
    916   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    917   EmitUint8(0x66);
    918   EmitOptionalRex32(dst, src);
    919   EmitUint8(0x0F);
    920   EmitUint8(0xF9);
    921   EmitXmmRegisterOperand(dst.LowBits(), src);
    922 }
    923 
    924 
    925 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
    926   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    927   EmitUint8(0x66);
    928   EmitOptionalRex32(dst, src);
    929   EmitUint8(0x0F);
    930   EmitUint8(0xD5);
    931   EmitXmmRegisterOperand(dst.LowBits(), src);
    932 }
    933 
    934 
    935 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
    936   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    937   EmitUint8(0x66);
    938   EmitOptionalRex32(dst, src);
    939   EmitUint8(0x0F);
    940   EmitUint8(0xFE);
    941   EmitXmmRegisterOperand(dst.LowBits(), src);
    942 }
    943 
    944 
    945 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
    946   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    947   EmitUint8(0x66);
    948   EmitOptionalRex32(dst, src);
    949   EmitUint8(0x0F);
    950   EmitUint8(0xFA);
    951   EmitXmmRegisterOperand(dst.LowBits(), src);
    952 }
    953 
    954 
    955 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
    956   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    957   EmitUint8(0x66);
    958   EmitOptionalRex32(dst, src);
    959   EmitUint8(0x0F);
    960   EmitUint8(0x38);
    961   EmitUint8(0x40);
    962   EmitXmmRegisterOperand(dst.LowBits(), src);
    963 }
    964 
    965 
    966 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
    967   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    968   EmitUint8(0x66);
    969   EmitOptionalRex32(dst, src);
    970   EmitUint8(0x0F);
    971   EmitUint8(0xD4);
    972   EmitXmmRegisterOperand(dst.LowBits(), src);
    973 }
    974 
    975 
    976 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
    977   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    978   EmitUint8(0x66);
    979   EmitOptionalRex32(dst, src);
    980   EmitUint8(0x0F);
    981   EmitUint8(0xFB);
    982   EmitXmmRegisterOperand(dst.LowBits(), src);
    983 }
    984 
    985 
    986 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
    987   cvtsi2ss(dst, src, false);
    988 }
    989 
    990 
    991 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
    992   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
    993   EmitUint8(0xF3);
    994   if (is64bit) {
    995     // Emit a REX.W prefix if the operand size is 64 bits.
    996     EmitRex64(dst, src);
    997   } else {
    998     EmitOptionalRex32(dst, src);
    999   }
   1000   EmitUint8(0x0F);
   1001   EmitUint8(0x2A);
   1002   EmitOperand(dst.LowBits(), Operand(src));
   1003 }
   1004 
   1005 
   1006 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
   1007   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1008   EmitUint8(0xF3);
   1009   if (is64bit) {
   1010     // Emit a REX.W prefix if the operand size is 64 bits.
   1011     EmitRex64(dst, src);
   1012   } else {
   1013     EmitOptionalRex32(dst, src);
   1014   }
   1015   EmitUint8(0x0F);
   1016   EmitUint8(0x2A);
   1017   EmitOperand(dst.LowBits(), src);
   1018 }
   1019 
   1020 
   1021 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
   1022   cvtsi2sd(dst, src, false);
   1023 }
   1024 
   1025 
   1026 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
   1027   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1028   EmitUint8(0xF2);
   1029   if (is64bit) {
   1030     // Emit a REX.W prefix if the operand size is 64 bits.
   1031     EmitRex64(dst, src);
   1032   } else {
   1033     EmitOptionalRex32(dst, src);
   1034   }
   1035   EmitUint8(0x0F);
   1036   EmitUint8(0x2A);
   1037   EmitOperand(dst.LowBits(), Operand(src));
   1038 }
   1039 
   1040 
   1041 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
   1042   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1043   EmitUint8(0xF2);
   1044   if (is64bit) {
   1045     // Emit a REX.W prefix if the operand size is 64 bits.
   1046     EmitRex64(dst, src);
   1047   } else {
   1048     EmitOptionalRex32(dst, src);
   1049   }
   1050   EmitUint8(0x0F);
   1051   EmitUint8(0x2A);
   1052   EmitOperand(dst.LowBits(), src);
   1053 }
   1054 
   1055 
   1056 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
   1057   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1058   EmitUint8(0xF3);
   1059   EmitOptionalRex32(dst, src);
   1060   EmitUint8(0x0F);
   1061   EmitUint8(0x2D);
   1062   EmitXmmRegisterOperand(dst.LowBits(), src);
   1063 }
   1064 
   1065 
   1066 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
   1067   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1068   EmitUint8(0xF3);
   1069   EmitOptionalRex32(dst, src);
   1070   EmitUint8(0x0F);
   1071   EmitUint8(0x5A);
   1072   EmitXmmRegisterOperand(dst.LowBits(), src);
   1073 }
   1074 
   1075 
   1076 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
   1077   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1078   EmitUint8(0xF3);
   1079   EmitOptionalRex32(dst, src);
   1080   EmitUint8(0x0F);
   1081   EmitUint8(0x5A);
   1082   EmitOperand(dst.LowBits(), src);
   1083 }
   1084 
   1085 
   1086 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
   1087   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1088   EmitUint8(0xF2);
   1089   EmitOptionalRex32(dst, src);
   1090   EmitUint8(0x0F);
   1091   EmitUint8(0x2D);
   1092   EmitXmmRegisterOperand(dst.LowBits(), src);
   1093 }
   1094 
   1095 
   1096 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
   1097   cvttss2si(dst, src, false);
   1098 }
   1099 
   1100 
   1101 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
   1102   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1103   EmitUint8(0xF3);
   1104   if (is64bit) {
   1105     // Emit a REX.W prefix if the operand size is 64 bits.
   1106     EmitRex64(dst, src);
   1107   } else {
   1108     EmitOptionalRex32(dst, src);
   1109   }
   1110   EmitUint8(0x0F);
   1111   EmitUint8(0x2C);
   1112   EmitXmmRegisterOperand(dst.LowBits(), src);
   1113 }
   1114 
   1115 
   1116 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
   1117   cvttsd2si(dst, src, false);
   1118 }
   1119 
   1120 
   1121 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
   1122   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1123   EmitUint8(0xF2);
   1124   if (is64bit) {
   1125     // Emit a REX.W prefix if the operand size is 64 bits.
   1126     EmitRex64(dst, src);
   1127   } else {
   1128     EmitOptionalRex32(dst, src);
   1129   }
   1130   EmitUint8(0x0F);
   1131   EmitUint8(0x2C);
   1132   EmitXmmRegisterOperand(dst.LowBits(), src);
   1133 }
   1134 
   1135 
   1136 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
   1137   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1138   EmitUint8(0xF2);
   1139   EmitOptionalRex32(dst, src);
   1140   EmitUint8(0x0F);
   1141   EmitUint8(0x5A);
   1142   EmitXmmRegisterOperand(dst.LowBits(), src);
   1143 }
   1144 
   1145 
   1146 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
   1147   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1148   EmitUint8(0xF2);
   1149   EmitOptionalRex32(dst, src);
   1150   EmitUint8(0x0F);
   1151   EmitUint8(0x5A);
   1152   EmitOperand(dst.LowBits(), src);
   1153 }
   1154 
   1155 
   1156 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
   1157   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1158   EmitOptionalRex32(dst, src);
   1159   EmitUint8(0x0F);
   1160   EmitUint8(0x5B);
   1161   EmitXmmRegisterOperand(dst.LowBits(), src);
   1162 }
   1163 
   1164 
   1165 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
   1166   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1167   EmitUint8(0xF3);
   1168   EmitOptionalRex32(dst, src);
   1169   EmitUint8(0x0F);
   1170   EmitUint8(0xE6);
   1171   EmitXmmRegisterOperand(dst.LowBits(), src);
   1172 }
   1173 
   1174 
   1175 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
   1176   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1177   EmitOptionalRex32(a, b);
   1178   EmitUint8(0x0F);
   1179   EmitUint8(0x2F);
   1180   EmitXmmRegisterOperand(a.LowBits(), b);
   1181 }
   1182 
   1183 
   1184 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
   1185   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1186   EmitOptionalRex32(a, b);
   1187   EmitUint8(0x0F);
   1188   EmitUint8(0x2F);
   1189   EmitOperand(a.LowBits(), b);
   1190 }
   1191 
   1192 
   1193 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
   1194   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1195   EmitUint8(0x66);
   1196   EmitOptionalRex32(a, b);
   1197   EmitUint8(0x0F);
   1198   EmitUint8(0x2F);
   1199   EmitXmmRegisterOperand(a.LowBits(), b);
   1200 }
   1201 
   1202 
   1203 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
   1204   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1205   EmitUint8(0x66);
   1206   EmitOptionalRex32(a, b);
   1207   EmitUint8(0x0F);
   1208   EmitUint8(0x2F);
   1209   EmitOperand(a.LowBits(), b);
   1210 }
   1211 
   1212 
   1213 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
   1214   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1215   EmitOptionalRex32(a, b);
   1216   EmitUint8(0x0F);
   1217   EmitUint8(0x2E);
   1218   EmitXmmRegisterOperand(a.LowBits(), b);
   1219 }
   1220 
   1221 
   1222 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
   1223   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1224   EmitOptionalRex32(a, b);
   1225   EmitUint8(0x0F);
   1226   EmitUint8(0x2E);
   1227   EmitOperand(a.LowBits(), b);
   1228 }
   1229 
   1230 
   1231 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
   1232   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1233   EmitUint8(0x66);
   1234   EmitOptionalRex32(a, b);
   1235   EmitUint8(0x0F);
   1236   EmitUint8(0x2E);
   1237   EmitXmmRegisterOperand(a.LowBits(), b);
   1238 }
   1239 
   1240 
   1241 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
   1242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1243   EmitUint8(0x66);
   1244   EmitOptionalRex32(a, b);
   1245   EmitUint8(0x0F);
   1246   EmitUint8(0x2E);
   1247   EmitOperand(a.LowBits(), b);
   1248 }
   1249 
   1250 
   1251 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   1252   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1253   EmitUint8(0x66);
   1254   EmitOptionalRex32(dst, src);
   1255   EmitUint8(0x0F);
   1256   EmitUint8(0x3A);
   1257   EmitUint8(0x0B);
   1258   EmitXmmRegisterOperand(dst.LowBits(), src);
   1259   EmitUint8(imm.value());
   1260 }
   1261 
   1262 
   1263 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   1264   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1265   EmitUint8(0x66);
   1266   EmitOptionalRex32(dst, src);
   1267   EmitUint8(0x0F);
   1268   EmitUint8(0x3A);
   1269   EmitUint8(0x0A);
   1270   EmitXmmRegisterOperand(dst.LowBits(), src);
   1271   EmitUint8(imm.value());
   1272 }
   1273 
   1274 
   1275 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
   1276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1277   EmitUint8(0xF2);
   1278   EmitOptionalRex32(dst, src);
   1279   EmitUint8(0x0F);
   1280   EmitUint8(0x51);
   1281   EmitXmmRegisterOperand(dst.LowBits(), src);
   1282 }
   1283 
   1284 
   1285 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
   1286   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1287   EmitUint8(0xF3);
   1288   EmitOptionalRex32(dst, src);
   1289   EmitUint8(0x0F);
   1290   EmitUint8(0x51);
   1291   EmitXmmRegisterOperand(dst.LowBits(), src);
   1292 }
   1293 
   1294 
   1295 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
   1296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1297   EmitUint8(0x66);
   1298   EmitOptionalRex32(dst, src);
   1299   EmitUint8(0x0F);
   1300   EmitUint8(0x57);
   1301   EmitOperand(dst.LowBits(), src);
   1302 }
   1303 
   1304 
   1305 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
   1306   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1307   EmitUint8(0x66);
   1308   EmitOptionalRex32(dst, src);
   1309   EmitUint8(0x0F);
   1310   EmitUint8(0x57);
   1311   EmitXmmRegisterOperand(dst.LowBits(), src);
   1312 }
   1313 
   1314 
   1315 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
   1316   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1317   EmitOptionalRex32(dst, src);
   1318   EmitUint8(0x0F);
   1319   EmitUint8(0x57);
   1320   EmitOperand(dst.LowBits(), src);
   1321 }
   1322 
   1323 
   1324 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
   1325   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1326   EmitOptionalRex32(dst, src);
   1327   EmitUint8(0x0F);
   1328   EmitUint8(0x57);
   1329   EmitXmmRegisterOperand(dst.LowBits(), src);
   1330 }
   1331 
   1332 
   1333 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
   1334   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1335   EmitUint8(0x66);
   1336   EmitOptionalRex32(dst, src);
   1337   EmitUint8(0x0F);
   1338   EmitUint8(0xEF);
   1339   EmitXmmRegisterOperand(dst.LowBits(), src);
   1340 }
   1341 
   1342 
   1343 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
   1344   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1345   EmitUint8(0x66);
   1346   EmitOptionalRex32(dst, src);
   1347   EmitUint8(0x0F);
   1348   EmitUint8(0x54);
   1349   EmitOperand(dst.LowBits(), src);
   1350 }
   1351 
   1352 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
   1353   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1354   EmitUint8(0x66);
   1355   EmitOptionalRex32(dst, src);
   1356   EmitUint8(0x0F);
   1357   EmitUint8(0x54);
   1358   EmitXmmRegisterOperand(dst.LowBits(), src);
   1359 }
   1360 
   1361 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
   1362   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1363   EmitOptionalRex32(dst, src);
   1364   EmitUint8(0x0F);
   1365   EmitUint8(0x54);
   1366   EmitXmmRegisterOperand(dst.LowBits(), src);
   1367 }
   1368 
   1369 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
   1370   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1371   EmitUint8(0x66);
   1372   EmitOptionalRex32(dst, src);
   1373   EmitUint8(0x0F);
   1374   EmitUint8(0xDB);
   1375   EmitXmmRegisterOperand(dst.LowBits(), src);
   1376 }
   1377 
   1378 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
   1379   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1380   EmitUint8(0x66);
   1381   EmitOptionalRex32(dst, src);
   1382   EmitUint8(0x0F);
   1383   EmitUint8(0x55);
   1384   EmitXmmRegisterOperand(dst.LowBits(), src);
   1385 }
   1386 
   1387 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
   1388   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1389   EmitOptionalRex32(dst, src);
   1390   EmitUint8(0x0F);
   1391   EmitUint8(0x55);
   1392   EmitXmmRegisterOperand(dst.LowBits(), src);
   1393 }
   1394 
   1395 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
   1396   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1397   EmitUint8(0x66);
   1398   EmitOptionalRex32(dst, src);
   1399   EmitUint8(0x0F);
   1400   EmitUint8(0xDF);
   1401   EmitXmmRegisterOperand(dst.LowBits(), src);
   1402 }
   1403 
   1404 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
   1405   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1406   EmitUint8(0x66);
   1407   EmitOptionalRex32(dst, src);
   1408   EmitUint8(0x0F);
   1409   EmitUint8(0x56);
   1410   EmitXmmRegisterOperand(dst.LowBits(), src);
   1411 }
   1412 
   1413 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
   1414   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1415   EmitOptionalRex32(dst, src);
   1416   EmitUint8(0x0F);
   1417   EmitUint8(0x56);
   1418   EmitXmmRegisterOperand(dst.LowBits(), src);
   1419 }
   1420 
   1421 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
   1422   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1423   EmitUint8(0x66);
   1424   EmitOptionalRex32(dst, src);
   1425   EmitUint8(0x0F);
   1426   EmitUint8(0xEB);
   1427   EmitXmmRegisterOperand(dst.LowBits(), src);
   1428 }
   1429 
   1430 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
   1431   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1432   EmitUint8(0x66);
   1433   EmitOptionalRex32(dst, src);
   1434   EmitUint8(0x0F);
   1435   EmitUint8(0xE0);
   1436   EmitXmmRegisterOperand(dst.LowBits(), src);
   1437 }
   1438 
   1439 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
   1440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1441   EmitUint8(0x66);
   1442   EmitOptionalRex32(dst, src);
   1443   EmitUint8(0x0F);
   1444   EmitUint8(0xE3);
   1445   EmitXmmRegisterOperand(dst.LowBits(), src);
   1446 }
   1447 
   1448 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
   1449   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1450   EmitUint8(0x66);
   1451   EmitOptionalRex32(dst, src);
   1452   EmitUint8(0x0F);
   1453   EmitUint8(0x74);
   1454   EmitXmmRegisterOperand(dst.LowBits(), src);
   1455 }
   1456 
   1457 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
   1458   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1459   EmitUint8(0x66);
   1460   EmitOptionalRex32(dst, src);
   1461   EmitUint8(0x0F);
   1462   EmitUint8(0x75);
   1463   EmitXmmRegisterOperand(dst.LowBits(), src);
   1464 }
   1465 
   1466 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
   1467   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1468   EmitUint8(0x66);
   1469   EmitOptionalRex32(dst, src);
   1470   EmitUint8(0x0F);
   1471   EmitUint8(0x76);
   1472   EmitXmmRegisterOperand(dst.LowBits(), src);
   1473 }
   1474 
   1475 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
   1476   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1477   EmitUint8(0x66);
   1478   EmitOptionalRex32(dst, src);
   1479   EmitUint8(0x0F);
   1480   EmitUint8(0x38);
   1481   EmitUint8(0x29);
   1482   EmitXmmRegisterOperand(dst.LowBits(), src);
   1483 }
   1484 
   1485 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
   1486   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1487   EmitUint8(0x66);
   1488   EmitOptionalRex32(dst, src);
   1489   EmitUint8(0x0F);
   1490   EmitUint8(0x64);
   1491   EmitXmmRegisterOperand(dst.LowBits(), src);
   1492 }
   1493 
   1494 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
   1495   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1496   EmitUint8(0x66);
   1497   EmitOptionalRex32(dst, src);
   1498   EmitUint8(0x0F);
   1499   EmitUint8(0x65);
   1500   EmitXmmRegisterOperand(dst.LowBits(), src);
   1501 }
   1502 
   1503 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
   1504   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1505   EmitUint8(0x66);
   1506   EmitOptionalRex32(dst, src);
   1507   EmitUint8(0x0F);
   1508   EmitUint8(0x66);
   1509   EmitXmmRegisterOperand(dst.LowBits(), src);
   1510 }
   1511 
   1512 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
   1513   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1514   EmitUint8(0x66);
   1515   EmitOptionalRex32(dst, src);
   1516   EmitUint8(0x0F);
   1517   EmitUint8(0x38);
   1518   EmitUint8(0x37);
   1519   EmitXmmRegisterOperand(dst.LowBits(), src);
   1520 }
   1521 
   1522 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   1523   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1524   EmitUint8(0x66);
   1525   EmitOptionalRex32(dst, src);
   1526   EmitUint8(0x0F);
   1527   EmitUint8(0xC6);
   1528   EmitXmmRegisterOperand(dst.LowBits(), src);
   1529   EmitUint8(imm.value());
   1530 }
   1531 
   1532 
   1533 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   1534   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1535   EmitOptionalRex32(dst, src);
   1536   EmitUint8(0x0F);
   1537   EmitUint8(0xC6);
   1538   EmitXmmRegisterOperand(dst.LowBits(), src);
   1539   EmitUint8(imm.value());
   1540 }
   1541 
   1542 
   1543 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   1544   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1545   EmitUint8(0x66);
   1546   EmitOptionalRex32(dst, src);
   1547   EmitUint8(0x0F);
   1548   EmitUint8(0x70);
   1549   EmitXmmRegisterOperand(dst.LowBits(), src);
   1550   EmitUint8(imm.value());
   1551 }
   1552 
   1553 
   1554 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
   1555   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1556   EmitUint8(0x66);
   1557   EmitOptionalRex32(dst, src);
   1558   EmitUint8(0x0F);
   1559   EmitUint8(0x60);
   1560   EmitXmmRegisterOperand(dst.LowBits(), src);
   1561 }
   1562 
   1563 
   1564 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
   1565   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1566   EmitUint8(0x66);
   1567   EmitOptionalRex32(dst, src);
   1568   EmitUint8(0x0F);
   1569   EmitUint8(0x61);
   1570   EmitXmmRegisterOperand(dst.LowBits(), src);
   1571 }
   1572 
   1573 
   1574 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
   1575   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1576   EmitUint8(0x66);
   1577   EmitOptionalRex32(dst, src);
   1578   EmitUint8(0x0F);
   1579   EmitUint8(0x62);
   1580   EmitXmmRegisterOperand(dst.LowBits(), src);
   1581 }
   1582 
   1583 
   1584 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
   1585   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1586   EmitUint8(0x66);
   1587   EmitOptionalRex32(dst, src);
   1588   EmitUint8(0x0F);
   1589   EmitUint8(0x6C);
   1590   EmitXmmRegisterOperand(dst.LowBits(), src);
   1591 }
   1592 
   1593 
   1594 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
   1595   DCHECK(shift_count.is_uint8());
   1596   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1597   EmitUint8(0x66);
   1598   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1599   EmitUint8(0x0F);
   1600   EmitUint8(0x71);
   1601   EmitXmmRegisterOperand(6, reg);
   1602   EmitUint8(shift_count.value());
   1603 }
   1604 
   1605 
   1606 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
   1607   DCHECK(shift_count.is_uint8());
   1608   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1609   EmitUint8(0x66);
   1610   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1611   EmitUint8(0x0F);
   1612   EmitUint8(0x72);
   1613   EmitXmmRegisterOperand(6, reg);
   1614   EmitUint8(shift_count.value());
   1615 }
   1616 
   1617 
   1618 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
   1619   DCHECK(shift_count.is_uint8());
   1620   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1621   EmitUint8(0x66);
   1622   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1623   EmitUint8(0x0F);
   1624   EmitUint8(0x73);
   1625   EmitXmmRegisterOperand(6, reg);
   1626   EmitUint8(shift_count.value());
   1627 }
   1628 
   1629 
   1630 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
   1631   DCHECK(shift_count.is_uint8());
   1632   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1633   EmitUint8(0x66);
   1634   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1635   EmitUint8(0x0F);
   1636   EmitUint8(0x71);
   1637   EmitXmmRegisterOperand(4, reg);
   1638   EmitUint8(shift_count.value());
   1639 }
   1640 
   1641 
   1642 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
   1643   DCHECK(shift_count.is_uint8());
   1644   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1645   EmitUint8(0x66);
   1646   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1647   EmitUint8(0x0F);
   1648   EmitUint8(0x72);
   1649   EmitXmmRegisterOperand(4, reg);
   1650   EmitUint8(shift_count.value());
   1651 }
   1652 
   1653 
   1654 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
   1655   DCHECK(shift_count.is_uint8());
   1656   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1657   EmitUint8(0x66);
   1658   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1659   EmitUint8(0x0F);
   1660   EmitUint8(0x71);
   1661   EmitXmmRegisterOperand(2, reg);
   1662   EmitUint8(shift_count.value());
   1663 }
   1664 
   1665 
   1666 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
   1667   DCHECK(shift_count.is_uint8());
   1668   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1669   EmitUint8(0x66);
   1670   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1671   EmitUint8(0x0F);
   1672   EmitUint8(0x72);
   1673   EmitXmmRegisterOperand(2, reg);
   1674   EmitUint8(shift_count.value());
   1675 }
   1676 
   1677 
   1678 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
   1679   DCHECK(shift_count.is_uint8());
   1680   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1681   EmitUint8(0x66);
   1682   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   1683   EmitUint8(0x0F);
   1684   EmitUint8(0x73);
   1685   EmitXmmRegisterOperand(2, reg);
   1686   EmitUint8(shift_count.value());
   1687 }
   1688 
   1689 
   1690 void X86_64Assembler::fldl(const Address& src) {
   1691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1692   EmitUint8(0xDD);
   1693   EmitOperand(0, src);
   1694 }
   1695 
   1696 
   1697 void X86_64Assembler::fstl(const Address& dst) {
   1698   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1699   EmitUint8(0xDD);
   1700   EmitOperand(2, dst);
   1701 }
   1702 
   1703 
   1704 void X86_64Assembler::fstpl(const Address& dst) {
   1705   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1706   EmitUint8(0xDD);
   1707   EmitOperand(3, dst);
   1708 }
   1709 
   1710 
   1711 void X86_64Assembler::fstsw() {
   1712   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1713   EmitUint8(0x9B);
   1714   EmitUint8(0xDF);
   1715   EmitUint8(0xE0);
   1716 }
   1717 
   1718 
   1719 void X86_64Assembler::fnstcw(const Address& dst) {
   1720   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1721   EmitUint8(0xD9);
   1722   EmitOperand(7, dst);
   1723 }
   1724 
   1725 
   1726 void X86_64Assembler::fldcw(const Address& src) {
   1727   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1728   EmitUint8(0xD9);
   1729   EmitOperand(5, src);
   1730 }
   1731 
   1732 
   1733 void X86_64Assembler::fistpl(const Address& dst) {
   1734   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1735   EmitUint8(0xDF);
   1736   EmitOperand(7, dst);
   1737 }
   1738 
   1739 
   1740 void X86_64Assembler::fistps(const Address& dst) {
   1741   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1742   EmitUint8(0xDB);
   1743   EmitOperand(3, dst);
   1744 }
   1745 
   1746 
   1747 void X86_64Assembler::fildl(const Address& src) {
   1748   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1749   EmitUint8(0xDF);
   1750   EmitOperand(5, src);
   1751 }
   1752 
   1753 
   1754 void X86_64Assembler::filds(const Address& src) {
   1755   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1756   EmitUint8(0xDB);
   1757   EmitOperand(0, src);
   1758 }
   1759 
   1760 
   1761 void X86_64Assembler::fincstp() {
   1762   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1763   EmitUint8(0xD9);
   1764   EmitUint8(0xF7);
   1765 }
   1766 
   1767 
   1768 void X86_64Assembler::ffree(const Immediate& index) {
   1769   CHECK_LT(index.value(), 7);
   1770   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1771   EmitUint8(0xDD);
   1772   EmitUint8(0xC0 + index.value());
   1773 }
   1774 
   1775 
   1776 void X86_64Assembler::fsin() {
   1777   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1778   EmitUint8(0xD9);
   1779   EmitUint8(0xFE);
   1780 }
   1781 
   1782 
   1783 void X86_64Assembler::fcos() {
   1784   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1785   EmitUint8(0xD9);
   1786   EmitUint8(0xFF);
   1787 }
   1788 
   1789 
   1790 void X86_64Assembler::fptan() {
   1791   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1792   EmitUint8(0xD9);
   1793   EmitUint8(0xF2);
   1794 }
   1795 
   1796 void X86_64Assembler::fucompp() {
   1797   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1798   EmitUint8(0xDA);
   1799   EmitUint8(0xE9);
   1800 }
   1801 
   1802 
   1803 void X86_64Assembler::fprem() {
   1804   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1805   EmitUint8(0xD9);
   1806   EmitUint8(0xF8);
   1807 }
   1808 
   1809 
   1810 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
   1811   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1812   // There is a short version for rax.
   1813   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
   1814   // work.
   1815   const bool src_rax = src.AsRegister() == RAX;
   1816   const bool dst_rax = dst.AsRegister() == RAX;
   1817   if (src_rax || dst_rax) {
   1818     EmitOptionalRex32(src_rax ? dst : src);
   1819     EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
   1820     return;
   1821   }
   1822 
   1823   // General case.
   1824   EmitOptionalRex32(src, dst);
   1825   EmitUint8(0x87);
   1826   EmitRegisterOperand(src.LowBits(), dst.LowBits());
   1827 }
   1828 
   1829 
   1830 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
   1831   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1832   // There is a short version for rax.
   1833   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
   1834   // work.
   1835   const bool src_rax = src.AsRegister() == RAX;
   1836   const bool dst_rax = dst.AsRegister() == RAX;
   1837   if (src_rax || dst_rax) {
   1838     // If src == target, emit a nop instead.
   1839     if (src_rax && dst_rax) {
   1840       EmitUint8(0x90);
   1841     } else {
   1842       EmitRex64(src_rax ? dst : src);
   1843       EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
   1844     }
   1845     return;
   1846   }
   1847 
   1848   // General case.
   1849   EmitRex64(src, dst);
   1850   EmitUint8(0x87);
   1851   EmitRegisterOperand(src.LowBits(), dst.LowBits());
   1852 }
   1853 
   1854 
   1855 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
   1856   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1857   EmitOptionalRex32(reg, address);
   1858   EmitUint8(0x87);
   1859   EmitOperand(reg.LowBits(), address);
   1860 }
   1861 
   1862 
   1863 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
   1864   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1865   CHECK(imm.is_int32());
   1866   EmitOptionalRex32(address);
   1867   EmitUint8(0x80);
   1868   EmitOperand(7, address);
   1869   EmitUint8(imm.value() & 0xFF);
   1870 }
   1871 
   1872 
   1873 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
   1874   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1875   CHECK(imm.is_int32());
   1876   EmitOperandSizeOverride();
   1877   EmitOptionalRex32(address);
   1878   EmitComplex(7, address, imm);
   1879 }
   1880 
   1881 
   1882 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
   1883   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1884   CHECK(imm.is_int32());
   1885   EmitOptionalRex32(reg);
   1886   EmitComplex(7, Operand(reg), imm);
   1887 }
   1888 
   1889 
   1890 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
   1891   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1892   EmitOptionalRex32(reg0, reg1);
   1893   EmitUint8(0x3B);
   1894   EmitOperand(reg0.LowBits(), Operand(reg1));
   1895 }
   1896 
   1897 
   1898 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
   1899   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1900   EmitOptionalRex32(reg, address);
   1901   EmitUint8(0x3B);
   1902   EmitOperand(reg.LowBits(), address);
   1903 }
   1904 
   1905 
   1906 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
   1907   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1908   EmitOptionalRex32(reg, address);
   1909   EmitUint8(0x39);
   1910   EmitOperand(reg.LowBits(), address);
   1911 }
   1912 
   1913 
   1914 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
   1915   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1916   CHECK(imm.is_int32());
   1917   EmitOptionalRex32(address);
   1918   EmitComplex(7, address, imm);
   1919 }
   1920 
   1921 
   1922 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
   1923   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1924   EmitRex64(reg0, reg1);
   1925   EmitUint8(0x3B);
   1926   EmitOperand(reg0.LowBits(), Operand(reg1));
   1927 }
   1928 
   1929 
   1930 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
   1931   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1932   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
   1933   EmitRex64(reg);
   1934   EmitComplex(7, Operand(reg), imm);
   1935 }
   1936 
   1937 
   1938 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
   1939   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1940   EmitRex64(reg, address);
   1941   EmitUint8(0x3B);
   1942   EmitOperand(reg.LowBits(), address);
   1943 }
   1944 
   1945 
   1946 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
   1947   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
   1948   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1949   EmitRex64(address);
   1950   EmitComplex(7, address, imm);
   1951 }
   1952 
   1953 
   1954 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
   1955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1956   EmitOptionalRex32(dst, src);
   1957   EmitUint8(0x03);
   1958   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   1959 }
   1960 
   1961 
   1962 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
   1963   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1964   EmitOptionalRex32(reg, address);
   1965   EmitUint8(0x03);
   1966   EmitOperand(reg.LowBits(), address);
   1967 }
   1968 
   1969 
   1970 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
   1971   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1972   EmitOptionalRex32(reg1, reg2);
   1973   EmitUint8(0x85);
   1974   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
   1975 }
   1976 
   1977 
   1978 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
   1979   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1980   EmitOptionalRex32(reg, address);
   1981   EmitUint8(0x85);
   1982   EmitOperand(reg.LowBits(), address);
   1983 }
   1984 
   1985 
   1986 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
   1987   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   1988   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
   1989   // we only test the byte CpuRegister to keep the encoding short.
   1990   if (immediate.is_uint8() && reg.AsRegister() < 4) {
   1991     // Use zero-extended 8-bit immediate.
   1992     if (reg.AsRegister() == RAX) {
   1993       EmitUint8(0xA8);
   1994     } else {
   1995       EmitUint8(0xF6);
   1996       EmitUint8(0xC0 + reg.AsRegister());
   1997     }
   1998     EmitUint8(immediate.value() & 0xFF);
   1999   } else if (reg.AsRegister() == RAX) {
   2000     // Use short form if the destination is RAX.
   2001     EmitUint8(0xA9);
   2002     EmitImmediate(immediate);
   2003   } else {
   2004     EmitOptionalRex32(reg);
   2005     EmitUint8(0xF7);
   2006     EmitOperand(0, Operand(reg));
   2007     EmitImmediate(immediate);
   2008   }
   2009 }
   2010 
   2011 
   2012 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
   2013   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2014   EmitRex64(reg1, reg2);
   2015   EmitUint8(0x85);
   2016   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
   2017 }
   2018 
   2019 
   2020 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
   2021   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2022   EmitRex64(reg, address);
   2023   EmitUint8(0x85);
   2024   EmitOperand(reg.LowBits(), address);
   2025 }
   2026 
   2027 
   2028 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
   2029   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2030   EmitOptionalRex32(dst);
   2031   EmitUint8(0xF6);
   2032   EmitOperand(Register::RAX, dst);
   2033   CHECK(imm.is_int8());
   2034   EmitUint8(imm.value() & 0xFF);
   2035 }
   2036 
   2037 
   2038 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
   2039   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2040   EmitOptionalRex32(dst);
   2041   EmitUint8(0xF7);
   2042   EmitOperand(0, dst);
   2043   EmitImmediate(imm);
   2044 }
   2045 
   2046 
   2047 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
   2048   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2049   EmitOptionalRex32(dst, src);
   2050   EmitUint8(0x23);
   2051   EmitOperand(dst.LowBits(), Operand(src));
   2052 }
   2053 
   2054 
   2055 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
   2056   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2057   EmitOptionalRex32(reg, address);
   2058   EmitUint8(0x23);
   2059   EmitOperand(reg.LowBits(), address);
   2060 }
   2061 
   2062 
   2063 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
   2064   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2065   EmitOptionalRex32(dst);
   2066   EmitComplex(4, Operand(dst), imm);
   2067 }
   2068 
   2069 
   2070 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
   2071   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2072   CHECK(imm.is_int32());  // andq only supports 32b immediate.
   2073   EmitRex64(reg);
   2074   EmitComplex(4, Operand(reg), imm);
   2075 }
   2076 
   2077 
   2078 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
   2079   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2080   EmitRex64(dst, src);
   2081   EmitUint8(0x23);
   2082   EmitOperand(dst.LowBits(), Operand(src));
   2083 }
   2084 
   2085 
   2086 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
   2087   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2088   EmitRex64(dst, src);
   2089   EmitUint8(0x23);
   2090   EmitOperand(dst.LowBits(), src);
   2091 }
   2092 
   2093 
   2094 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
   2095   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2096   EmitOptionalRex32(dst, src);
   2097   EmitUint8(0x0B);
   2098   EmitOperand(dst.LowBits(), Operand(src));
   2099 }
   2100 
   2101 
   2102 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
   2103   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2104   EmitOptionalRex32(reg, address);
   2105   EmitUint8(0x0B);
   2106   EmitOperand(reg.LowBits(), address);
   2107 }
   2108 
   2109 
   2110 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
   2111   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2112   EmitOptionalRex32(dst);
   2113   EmitComplex(1, Operand(dst), imm);
   2114 }
   2115 
   2116 
   2117 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
   2118   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2119   CHECK(imm.is_int32());  // orq only supports 32b immediate.
   2120   EmitRex64(dst);
   2121   EmitComplex(1, Operand(dst), imm);
   2122 }
   2123 
   2124 
   2125 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
   2126   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2127   EmitRex64(dst, src);
   2128   EmitUint8(0x0B);
   2129   EmitOperand(dst.LowBits(), Operand(src));
   2130 }
   2131 
   2132 
   2133 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
   2134   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2135   EmitRex64(dst, src);
   2136   EmitUint8(0x0B);
   2137   EmitOperand(dst.LowBits(), src);
   2138 }
   2139 
   2140 
   2141 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
   2142   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2143   EmitOptionalRex32(dst, src);
   2144   EmitUint8(0x33);
   2145   EmitOperand(dst.LowBits(), Operand(src));
   2146 }
   2147 
   2148 
   2149 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
   2150   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2151   EmitOptionalRex32(reg, address);
   2152   EmitUint8(0x33);
   2153   EmitOperand(reg.LowBits(), address);
   2154 }
   2155 
   2156 
   2157 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
   2158   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2159   EmitOptionalRex32(dst);
   2160   EmitComplex(6, Operand(dst), imm);
   2161 }
   2162 
   2163 
   2164 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
   2165   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2166   EmitRex64(dst, src);
   2167   EmitUint8(0x33);
   2168   EmitOperand(dst.LowBits(), Operand(src));
   2169 }
   2170 
   2171 
   2172 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
   2173   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2174   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
   2175   EmitRex64(dst);
   2176   EmitComplex(6, Operand(dst), imm);
   2177 }
   2178 
   2179 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
   2180   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2181   EmitRex64(dst, src);
   2182   EmitUint8(0x33);
   2183   EmitOperand(dst.LowBits(), src);
   2184 }
   2185 
   2186 
   2187 #if 0
   2188 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
   2189   // REX.WRXB
   2190   // W - 64-bit operand
   2191   // R - MODRM.reg
   2192   // X - SIB.index
   2193   // B - MODRM.rm/SIB.base
   2194   uint8_t rex = force ? 0x40 : 0;
   2195   if (w) {
   2196     rex |= 0x48;  // REX.W000
   2197   }
   2198   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
   2199     rex |= 0x44;  // REX.0R00
   2200     *r = static_cast<Register>(*r - 8);
   2201   }
   2202   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
   2203     rex |= 0x42;  // REX.00X0
   2204     *x = static_cast<Register>(*x - 8);
   2205   }
   2206   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
   2207     rex |= 0x41;  // REX.000B
   2208     *b = static_cast<Register>(*b - 8);
   2209   }
   2210   if (rex != 0) {
   2211     EmitUint8(rex);
   2212   }
   2213 }
   2214 
   2215 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
   2216   // REX.WRXB
   2217   // W - 64-bit operand
   2218   // R - MODRM.reg
   2219   // X - SIB.index
   2220   // B - MODRM.rm/SIB.base
   2221   uint8_t rex = mem->rex();
   2222   if (force) {
   2223     rex |= 0x40;  // REX.0000
   2224   }
   2225   if (w) {
   2226     rex |= 0x48;  // REX.W000
   2227   }
   2228   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
   2229     rex |= 0x44;  // REX.0R00
   2230     *dst = static_cast<Register>(*dst - 8);
   2231   }
   2232   if (rex != 0) {
   2233     EmitUint8(rex);
   2234   }
   2235 }
   2236 
   2237 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
   2238 #endif
   2239 
   2240 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
   2241   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2242   EmitOptionalRex32(reg);
   2243   EmitComplex(0, Operand(reg), imm);
   2244 }
   2245 
   2246 
   2247 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
   2248   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2249   CHECK(imm.is_int32());  // addq only supports 32b immediate.
   2250   EmitRex64(reg);
   2251   EmitComplex(0, Operand(reg), imm);
   2252 }
   2253 
   2254 
   2255 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
   2256   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2257   EmitRex64(dst, address);
   2258   EmitUint8(0x03);
   2259   EmitOperand(dst.LowBits(), address);
   2260 }
   2261 
   2262 
   2263 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
   2264   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2265   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
   2266   EmitRex64(src, dst);
   2267   EmitUint8(0x01);
   2268   EmitRegisterOperand(src.LowBits(), dst.LowBits());
   2269 }
   2270 
   2271 
   2272 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
   2273   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2274   EmitOptionalRex32(reg, address);
   2275   EmitUint8(0x01);
   2276   EmitOperand(reg.LowBits(), address);
   2277 }
   2278 
   2279 
   2280 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
   2281   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2282   EmitOptionalRex32(address);
   2283   EmitComplex(0, address, imm);
   2284 }
   2285 
   2286 
   2287 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
   2288   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2289   EmitOptionalRex32(dst, src);
   2290   EmitUint8(0x2B);
   2291   EmitOperand(dst.LowBits(), Operand(src));
   2292 }
   2293 
   2294 
   2295 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
   2296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2297   EmitOptionalRex32(reg);
   2298   EmitComplex(5, Operand(reg), imm);
   2299 }
   2300 
   2301 
   2302 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
   2303   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2304   CHECK(imm.is_int32());  // subq only supports 32b immediate.
   2305   EmitRex64(reg);
   2306   EmitComplex(5, Operand(reg), imm);
   2307 }
   2308 
   2309 
   2310 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
   2311   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2312   EmitRex64(dst, src);
   2313   EmitUint8(0x2B);
   2314   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2315 }
   2316 
   2317 
   2318 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
   2319   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2320   EmitRex64(reg, address);
   2321   EmitUint8(0x2B);
   2322   EmitOperand(reg.LowBits() & 7, address);
   2323 }
   2324 
   2325 
   2326 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
   2327   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2328   EmitOptionalRex32(reg, address);
   2329   EmitUint8(0x2B);
   2330   EmitOperand(reg.LowBits(), address);
   2331 }
   2332 
   2333 
   2334 void X86_64Assembler::cdq() {
   2335   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2336   EmitUint8(0x99);
   2337 }
   2338 
   2339 
   2340 void X86_64Assembler::cqo() {
   2341   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2342   EmitRex64();
   2343   EmitUint8(0x99);
   2344 }
   2345 
   2346 
   2347 void X86_64Assembler::idivl(CpuRegister reg) {
   2348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2349   EmitOptionalRex32(reg);
   2350   EmitUint8(0xF7);
   2351   EmitUint8(0xF8 | reg.LowBits());
   2352 }
   2353 
   2354 
   2355 void X86_64Assembler::idivq(CpuRegister reg) {
   2356   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2357   EmitRex64(reg);
   2358   EmitUint8(0xF7);
   2359   EmitUint8(0xF8 | reg.LowBits());
   2360 }
   2361 
   2362 
   2363 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
   2364   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2365   EmitOptionalRex32(dst, src);
   2366   EmitUint8(0x0F);
   2367   EmitUint8(0xAF);
   2368   EmitOperand(dst.LowBits(), Operand(src));
   2369 }
   2370 
   2371 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
   2372   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2373   CHECK(imm.is_int32());  // imull only supports 32b immediate.
   2374 
   2375   EmitOptionalRex32(dst, src);
   2376 
   2377   // See whether imm can be represented as a sign-extended 8bit value.
   2378   int32_t v32 = static_cast<int32_t>(imm.value());
   2379   if (IsInt<8>(v32)) {
   2380     // Sign-extension works.
   2381     EmitUint8(0x6B);
   2382     EmitOperand(dst.LowBits(), Operand(src));
   2383     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
   2384   } else {
   2385     // Not representable, use full immediate.
   2386     EmitUint8(0x69);
   2387     EmitOperand(dst.LowBits(), Operand(src));
   2388     EmitImmediate(imm);
   2389   }
   2390 }
   2391 
   2392 
   2393 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
   2394   imull(reg, reg, imm);
   2395 }
   2396 
   2397 
   2398 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
   2399   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2400   EmitOptionalRex32(reg, address);
   2401   EmitUint8(0x0F);
   2402   EmitUint8(0xAF);
   2403   EmitOperand(reg.LowBits(), address);
   2404 }
   2405 
   2406 
   2407 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
   2408   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2409   EmitRex64(dst, src);
   2410   EmitUint8(0x0F);
   2411   EmitUint8(0xAF);
   2412   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2413 }
   2414 
   2415 
   2416 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
   2417   imulq(reg, reg, imm);
   2418 }
   2419 
   2420 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
   2421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2422   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
   2423 
   2424   EmitRex64(dst, reg);
   2425 
   2426   // See whether imm can be represented as a sign-extended 8bit value.
   2427   int64_t v64 = imm.value();
   2428   if (IsInt<8>(v64)) {
   2429     // Sign-extension works.
   2430     EmitUint8(0x6B);
   2431     EmitOperand(dst.LowBits(), Operand(reg));
   2432     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
   2433   } else {
   2434     // Not representable, use full immediate.
   2435     EmitUint8(0x69);
   2436     EmitOperand(dst.LowBits(), Operand(reg));
   2437     EmitImmediate(imm);
   2438   }
   2439 }
   2440 
   2441 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
   2442   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2443   EmitRex64(reg, address);
   2444   EmitUint8(0x0F);
   2445   EmitUint8(0xAF);
   2446   EmitOperand(reg.LowBits(), address);
   2447 }
   2448 
   2449 
   2450 void X86_64Assembler::imull(CpuRegister reg) {
   2451   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2452   EmitOptionalRex32(reg);
   2453   EmitUint8(0xF7);
   2454   EmitOperand(5, Operand(reg));
   2455 }
   2456 
   2457 
   2458 void X86_64Assembler::imulq(CpuRegister reg) {
   2459   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2460   EmitRex64(reg);
   2461   EmitUint8(0xF7);
   2462   EmitOperand(5, Operand(reg));
   2463 }
   2464 
   2465 
   2466 void X86_64Assembler::imull(const Address& address) {
   2467   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2468   EmitOptionalRex32(address);
   2469   EmitUint8(0xF7);
   2470   EmitOperand(5, address);
   2471 }
   2472 
   2473 
   2474 void X86_64Assembler::mull(CpuRegister reg) {
   2475   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2476   EmitOptionalRex32(reg);
   2477   EmitUint8(0xF7);
   2478   EmitOperand(4, Operand(reg));
   2479 }
   2480 
   2481 
   2482 void X86_64Assembler::mull(const Address& address) {
   2483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2484   EmitOptionalRex32(address);
   2485   EmitUint8(0xF7);
   2486   EmitOperand(4, address);
   2487 }
   2488 
   2489 
   2490 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
   2491   EmitGenericShift(false, 4, reg, imm);
   2492 }
   2493 
   2494 
   2495 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
   2496   EmitGenericShift(true, 4, reg, imm);
   2497 }
   2498 
   2499 
   2500 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
   2501   EmitGenericShift(false, 4, operand, shifter);
   2502 }
   2503 
   2504 
   2505 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
   2506   EmitGenericShift(true, 4, operand, shifter);
   2507 }
   2508 
   2509 
   2510 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
   2511   EmitGenericShift(false, 5, reg, imm);
   2512 }
   2513 
   2514 
   2515 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
   2516   EmitGenericShift(true, 5, reg, imm);
   2517 }
   2518 
   2519 
   2520 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
   2521   EmitGenericShift(false, 5, operand, shifter);
   2522 }
   2523 
   2524 
   2525 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
   2526   EmitGenericShift(true, 5, operand, shifter);
   2527 }
   2528 
   2529 
   2530 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
   2531   EmitGenericShift(false, 7, reg, imm);
   2532 }
   2533 
   2534 
   2535 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
   2536   EmitGenericShift(false, 7, operand, shifter);
   2537 }
   2538 
   2539 
   2540 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
   2541   EmitGenericShift(true, 7, reg, imm);
   2542 }
   2543 
   2544 
   2545 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
   2546   EmitGenericShift(true, 7, operand, shifter);
   2547 }
   2548 
   2549 
   2550 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
   2551   EmitGenericShift(false, 0, reg, imm);
   2552 }
   2553 
   2554 
   2555 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
   2556   EmitGenericShift(false, 0, operand, shifter);
   2557 }
   2558 
   2559 
   2560 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
   2561   EmitGenericShift(false, 1, reg, imm);
   2562 }
   2563 
   2564 
   2565 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
   2566   EmitGenericShift(false, 1, operand, shifter);
   2567 }
   2568 
   2569 
   2570 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
   2571   EmitGenericShift(true, 0, reg, imm);
   2572 }
   2573 
   2574 
   2575 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
   2576   EmitGenericShift(true, 0, operand, shifter);
   2577 }
   2578 
   2579 
   2580 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
   2581   EmitGenericShift(true, 1, reg, imm);
   2582 }
   2583 
   2584 
   2585 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
   2586   EmitGenericShift(true, 1, operand, shifter);
   2587 }
   2588 
   2589 
   2590 void X86_64Assembler::negl(CpuRegister reg) {
   2591   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2592   EmitOptionalRex32(reg);
   2593   EmitUint8(0xF7);
   2594   EmitOperand(3, Operand(reg));
   2595 }
   2596 
   2597 
   2598 void X86_64Assembler::negq(CpuRegister reg) {
   2599   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2600   EmitRex64(reg);
   2601   EmitUint8(0xF7);
   2602   EmitOperand(3, Operand(reg));
   2603 }
   2604 
   2605 
   2606 void X86_64Assembler::notl(CpuRegister reg) {
   2607   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2608   EmitOptionalRex32(reg);
   2609   EmitUint8(0xF7);
   2610   EmitUint8(0xD0 | reg.LowBits());
   2611 }
   2612 
   2613 
   2614 void X86_64Assembler::notq(CpuRegister reg) {
   2615   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2616   EmitRex64(reg);
   2617   EmitUint8(0xF7);
   2618   EmitOperand(2, Operand(reg));
   2619 }
   2620 
   2621 
   2622 void X86_64Assembler::enter(const Immediate& imm) {
   2623   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2624   EmitUint8(0xC8);
   2625   CHECK(imm.is_uint16()) << imm.value();
   2626   EmitUint8(imm.value() & 0xFF);
   2627   EmitUint8((imm.value() >> 8) & 0xFF);
   2628   EmitUint8(0x00);
   2629 }
   2630 
   2631 
   2632 void X86_64Assembler::leave() {
   2633   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2634   EmitUint8(0xC9);
   2635 }
   2636 
   2637 
   2638 void X86_64Assembler::ret() {
   2639   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2640   EmitUint8(0xC3);
   2641 }
   2642 
   2643 
   2644 void X86_64Assembler::ret(const Immediate& imm) {
   2645   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2646   EmitUint8(0xC2);
   2647   CHECK(imm.is_uint16());
   2648   EmitUint8(imm.value() & 0xFF);
   2649   EmitUint8((imm.value() >> 8) & 0xFF);
   2650 }
   2651 
   2652 
   2653 
   2654 void X86_64Assembler::nop() {
   2655   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2656   EmitUint8(0x90);
   2657 }
   2658 
   2659 
   2660 void X86_64Assembler::int3() {
   2661   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2662   EmitUint8(0xCC);
   2663 }
   2664 
   2665 
   2666 void X86_64Assembler::hlt() {
   2667   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2668   EmitUint8(0xF4);
   2669 }
   2670 
   2671 
   2672 void X86_64Assembler::j(Condition condition, Label* label) {
   2673   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2674   if (label->IsBound()) {
   2675     static const int kShortSize = 2;
   2676     static const int kLongSize = 6;
   2677     int offset = label->Position() - buffer_.Size();
   2678     CHECK_LE(offset, 0);
   2679     if (IsInt<8>(offset - kShortSize)) {
   2680       EmitUint8(0x70 + condition);
   2681       EmitUint8((offset - kShortSize) & 0xFF);
   2682     } else {
   2683       EmitUint8(0x0F);
   2684       EmitUint8(0x80 + condition);
   2685       EmitInt32(offset - kLongSize);
   2686     }
   2687   } else {
   2688     EmitUint8(0x0F);
   2689     EmitUint8(0x80 + condition);
   2690     EmitLabelLink(label);
   2691   }
   2692 }
   2693 
   2694 
   2695 void X86_64Assembler::j(Condition condition, NearLabel* label) {
   2696   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2697   if (label->IsBound()) {
   2698     static const int kShortSize = 2;
   2699     int offset = label->Position() - buffer_.Size();
   2700     CHECK_LE(offset, 0);
   2701     CHECK(IsInt<8>(offset - kShortSize));
   2702     EmitUint8(0x70 + condition);
   2703     EmitUint8((offset - kShortSize) & 0xFF);
   2704   } else {
   2705     EmitUint8(0x70 + condition);
   2706     EmitLabelLink(label);
   2707   }
   2708 }
   2709 
   2710 
   2711 void X86_64Assembler::jrcxz(NearLabel* label) {
   2712   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2713   if (label->IsBound()) {
   2714     static const int kShortSize = 2;
   2715     int offset = label->Position() - buffer_.Size();
   2716     CHECK_LE(offset, 0);
   2717     CHECK(IsInt<8>(offset - kShortSize));
   2718     EmitUint8(0xE3);
   2719     EmitUint8((offset - kShortSize) & 0xFF);
   2720   } else {
   2721     EmitUint8(0xE3);
   2722     EmitLabelLink(label);
   2723   }
   2724 }
   2725 
   2726 
   2727 void X86_64Assembler::jmp(CpuRegister reg) {
   2728   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2729   EmitOptionalRex32(reg);
   2730   EmitUint8(0xFF);
   2731   EmitRegisterOperand(4, reg.LowBits());
   2732 }
   2733 
   2734 void X86_64Assembler::jmp(const Address& address) {
   2735   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2736   EmitOptionalRex32(address);
   2737   EmitUint8(0xFF);
   2738   EmitOperand(4, address);
   2739 }
   2740 
   2741 void X86_64Assembler::jmp(Label* label) {
   2742   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2743   if (label->IsBound()) {
   2744     static const int kShortSize = 2;
   2745     static const int kLongSize = 5;
   2746     int offset = label->Position() - buffer_.Size();
   2747     CHECK_LE(offset, 0);
   2748     if (IsInt<8>(offset - kShortSize)) {
   2749       EmitUint8(0xEB);
   2750       EmitUint8((offset - kShortSize) & 0xFF);
   2751     } else {
   2752       EmitUint8(0xE9);
   2753       EmitInt32(offset - kLongSize);
   2754     }
   2755   } else {
   2756     EmitUint8(0xE9);
   2757     EmitLabelLink(label);
   2758   }
   2759 }
   2760 
   2761 
   2762 void X86_64Assembler::jmp(NearLabel* label) {
   2763   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2764   if (label->IsBound()) {
   2765     static const int kShortSize = 2;
   2766     int offset = label->Position() - buffer_.Size();
   2767     CHECK_LE(offset, 0);
   2768     CHECK(IsInt<8>(offset - kShortSize));
   2769     EmitUint8(0xEB);
   2770     EmitUint8((offset - kShortSize) & 0xFF);
   2771   } else {
   2772     EmitUint8(0xEB);
   2773     EmitLabelLink(label);
   2774   }
   2775 }
   2776 
   2777 
   2778 void X86_64Assembler::rep_movsw() {
   2779   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2780   EmitUint8(0x66);
   2781   EmitUint8(0xF3);
   2782   EmitUint8(0xA5);
   2783 }
   2784 
   2785 
   2786 X86_64Assembler* X86_64Assembler::lock() {
   2787   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2788   EmitUint8(0xF0);
   2789   return this;
   2790 }
   2791 
   2792 
   2793 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
   2794   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2795   EmitOptionalRex32(reg, address);
   2796   EmitUint8(0x0F);
   2797   EmitUint8(0xB1);
   2798   EmitOperand(reg.LowBits(), address);
   2799 }
   2800 
   2801 
   2802 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
   2803   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2804   EmitRex64(reg, address);
   2805   EmitUint8(0x0F);
   2806   EmitUint8(0xB1);
   2807   EmitOperand(reg.LowBits(), address);
   2808 }
   2809 
   2810 
   2811 void X86_64Assembler::mfence() {
   2812   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2813   EmitUint8(0x0F);
   2814   EmitUint8(0xAE);
   2815   EmitUint8(0xF0);
   2816 }
   2817 
   2818 
   2819 X86_64Assembler* X86_64Assembler::gs() {
   2820   // TODO: gs is a prefix and not an instruction
   2821   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2822   EmitUint8(0x65);
   2823   return this;
   2824 }
   2825 
   2826 
   2827 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
   2828   int value = imm.value();
   2829   if (value != 0) {
   2830     if (value > 0) {
   2831       addl(reg, imm);
   2832     } else {
   2833       subl(reg, Immediate(value));
   2834     }
   2835   }
   2836 }
   2837 
   2838 
   2839 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
   2840   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2841   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
   2842   if (dst.NeedsRex() || dst.AsRegister() > 3) {
   2843     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
   2844   }
   2845   EmitUint8(0x0F);
   2846   EmitUint8(0x90 + condition);
   2847   EmitUint8(0xC0 + dst.LowBits());
   2848 }
   2849 
   2850 void X86_64Assembler::bswapl(CpuRegister dst) {
   2851   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2852   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
   2853   EmitUint8(0x0F);
   2854   EmitUint8(0xC8 + dst.LowBits());
   2855 }
   2856 
   2857 void X86_64Assembler::bswapq(CpuRegister dst) {
   2858   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2859   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
   2860   EmitUint8(0x0F);
   2861   EmitUint8(0xC8 + dst.LowBits());
   2862 }
   2863 
   2864 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
   2865   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2866   EmitOptionalRex32(dst, src);
   2867   EmitUint8(0x0F);
   2868   EmitUint8(0xBC);
   2869   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2870 }
   2871 
   2872 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
   2873   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2874   EmitOptionalRex32(dst, src);
   2875   EmitUint8(0x0F);
   2876   EmitUint8(0xBC);
   2877   EmitOperand(dst.LowBits(), src);
   2878 }
   2879 
   2880 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
   2881   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2882   EmitRex64(dst, src);
   2883   EmitUint8(0x0F);
   2884   EmitUint8(0xBC);
   2885   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2886 }
   2887 
   2888 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
   2889   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2890   EmitRex64(dst, src);
   2891   EmitUint8(0x0F);
   2892   EmitUint8(0xBC);
   2893   EmitOperand(dst.LowBits(), src);
   2894 }
   2895 
   2896 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
   2897   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2898   EmitOptionalRex32(dst, src);
   2899   EmitUint8(0x0F);
   2900   EmitUint8(0xBD);
   2901   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2902 }
   2903 
   2904 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
   2905   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2906   EmitOptionalRex32(dst, src);
   2907   EmitUint8(0x0F);
   2908   EmitUint8(0xBD);
   2909   EmitOperand(dst.LowBits(), src);
   2910 }
   2911 
   2912 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
   2913   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2914   EmitRex64(dst, src);
   2915   EmitUint8(0x0F);
   2916   EmitUint8(0xBD);
   2917   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2918 }
   2919 
   2920 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
   2921   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2922   EmitRex64(dst, src);
   2923   EmitUint8(0x0F);
   2924   EmitUint8(0xBD);
   2925   EmitOperand(dst.LowBits(), src);
   2926 }
   2927 
   2928 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
   2929   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2930   EmitUint8(0xF3);
   2931   EmitOptionalRex32(dst, src);
   2932   EmitUint8(0x0F);
   2933   EmitUint8(0xB8);
   2934   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2935 }
   2936 
   2937 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
   2938   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2939   EmitUint8(0xF3);
   2940   EmitOptionalRex32(dst, src);
   2941   EmitUint8(0x0F);
   2942   EmitUint8(0xB8);
   2943   EmitOperand(dst.LowBits(), src);
   2944 }
   2945 
   2946 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
   2947   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2948   EmitUint8(0xF3);
   2949   EmitRex64(dst, src);
   2950   EmitUint8(0x0F);
   2951   EmitUint8(0xB8);
   2952   EmitRegisterOperand(dst.LowBits(), src.LowBits());
   2953 }
   2954 
   2955 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
   2956   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2957   EmitUint8(0xF3);
   2958   EmitRex64(dst, src);
   2959   EmitUint8(0x0F);
   2960   EmitUint8(0xB8);
   2961   EmitOperand(dst.LowBits(), src);
   2962 }
   2963 
   2964 void X86_64Assembler::repne_scasb() {
   2965   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2966   EmitUint8(0xF2);
   2967   EmitUint8(0xAE);
   2968 }
   2969 
   2970 void X86_64Assembler::repne_scasw() {
   2971   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2972   EmitUint8(0x66);
   2973   EmitUint8(0xF2);
   2974   EmitUint8(0xAF);
   2975 }
   2976 
   2977 void X86_64Assembler::repe_cmpsw() {
   2978   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2979   EmitUint8(0x66);
   2980   EmitUint8(0xF3);
   2981   EmitUint8(0xA7);
   2982 }
   2983 
   2984 
   2985 void X86_64Assembler::repe_cmpsl() {
   2986   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2987   EmitUint8(0xF3);
   2988   EmitUint8(0xA7);
   2989 }
   2990 
   2991 
   2992 void X86_64Assembler::repe_cmpsq() {
   2993   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   2994   EmitUint8(0xF3);
   2995   EmitRex64();
   2996   EmitUint8(0xA7);
   2997 }
   2998 
   2999 
   3000 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   3001   // TODO: Need to have a code constants table.
   3002   int64_t constant = bit_cast<int64_t, double>(value);
   3003   pushq(Immediate(High32Bits(constant)));
   3004   pushq(Immediate(Low32Bits(constant)));
   3005   movsd(dst, Address(CpuRegister(RSP), 0));
   3006   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
   3007 }
   3008 
   3009 
   3010 void X86_64Assembler::Align(int alignment, int offset) {
   3011   CHECK(IsPowerOfTwo(alignment));
   3012   // Emit nop instruction until the real position is aligned.
   3013   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
   3014     nop();
   3015   }
   3016 }
   3017 
   3018 
   3019 void X86_64Assembler::Bind(Label* label) {
   3020   int bound = buffer_.Size();
   3021   CHECK(!label->IsBound());  // Labels can only be bound once.
   3022   while (label->IsLinked()) {
   3023     int position = label->LinkPosition();
   3024     int next = buffer_.Load<int32_t>(position);
   3025     buffer_.Store<int32_t>(position, bound - (position + 4));
   3026     label->position_ = next;
   3027   }
   3028   label->BindTo(bound);
   3029 }
   3030 
   3031 
   3032 void X86_64Assembler::Bind(NearLabel* label) {
   3033   int bound = buffer_.Size();
   3034   CHECK(!label->IsBound());  // Labels can only be bound once.
   3035   while (label->IsLinked()) {
   3036     int position = label->LinkPosition();
   3037     uint8_t delta = buffer_.Load<uint8_t>(position);
   3038     int offset = bound - (position + 1);
   3039     CHECK(IsInt<8>(offset));
   3040     buffer_.Store<int8_t>(position, offset);
   3041     label->position_ = delta != 0u ? label->position_ - delta : 0;
   3042   }
   3043   label->BindTo(bound);
   3044 }
   3045 
   3046 
   3047 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
   3048   CHECK_GE(reg_or_opcode, 0);
   3049   CHECK_LT(reg_or_opcode, 8);
   3050   const int length = operand.length_;
   3051   CHECK_GT(length, 0);
   3052   // Emit the ModRM byte updated with the given reg value.
   3053   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
   3054   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
   3055   // Emit the rest of the encoded operand.
   3056   for (int i = 1; i < length; i++) {
   3057     EmitUint8(operand.encoding_[i]);
   3058   }
   3059   AssemblerFixup* fixup = operand.GetFixup();
   3060   if (fixup != nullptr) {
   3061     EmitFixup(fixup);
   3062   }
   3063 }
   3064 
   3065 
   3066 void X86_64Assembler::EmitImmediate(const Immediate& imm) {
   3067   if (imm.is_int32()) {
   3068     EmitInt32(static_cast<int32_t>(imm.value()));
   3069   } else {
   3070     EmitInt64(imm.value());
   3071   }
   3072 }
   3073 
   3074 
   3075 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
   3076                                   const Operand& operand,
   3077                                   const Immediate& immediate) {
   3078   CHECK_GE(reg_or_opcode, 0);
   3079   CHECK_LT(reg_or_opcode, 8);
   3080   if (immediate.is_int8()) {
   3081     // Use sign-extended 8-bit immediate.
   3082     EmitUint8(0x83);
   3083     EmitOperand(reg_or_opcode, operand);
   3084     EmitUint8(immediate.value() & 0xFF);
   3085   } else if (operand.IsRegister(CpuRegister(RAX))) {
   3086     // Use short form if the destination is eax.
   3087     EmitUint8(0x05 + (reg_or_opcode << 3));
   3088     EmitImmediate(immediate);
   3089   } else {
   3090     EmitUint8(0x81);
   3091     EmitOperand(reg_or_opcode, operand);
   3092     EmitImmediate(immediate);
   3093   }
   3094 }
   3095 
   3096 
   3097 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
   3098   if (label->IsBound()) {
   3099     int offset = label->Position() - buffer_.Size();
   3100     CHECK_LE(offset, 0);
   3101     EmitInt32(offset - instruction_size);
   3102   } else {
   3103     EmitLabelLink(label);
   3104   }
   3105 }
   3106 
   3107 
   3108 void X86_64Assembler::EmitLabelLink(Label* label) {
   3109   CHECK(!label->IsBound());
   3110   int position = buffer_.Size();
   3111   EmitInt32(label->position_);
   3112   label->LinkTo(position);
   3113 }
   3114 
   3115 
   3116 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
   3117   CHECK(!label->IsBound());
   3118   int position = buffer_.Size();
   3119   if (label->IsLinked()) {
   3120     // Save the delta in the byte that we have to play with.
   3121     uint32_t delta = position - label->LinkPosition();
   3122     CHECK(IsUint<8>(delta));
   3123     EmitUint8(delta & 0xFF);
   3124   } else {
   3125     EmitUint8(0);
   3126   }
   3127   label->LinkTo(position);
   3128 }
   3129 
   3130 
   3131 void X86_64Assembler::EmitGenericShift(bool wide,
   3132                                        int reg_or_opcode,
   3133                                        CpuRegister reg,
   3134                                        const Immediate& imm) {
   3135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   3136   CHECK(imm.is_int8());
   3137   if (wide) {
   3138     EmitRex64(reg);
   3139   } else {
   3140     EmitOptionalRex32(reg);
   3141   }
   3142   if (imm.value() == 1) {
   3143     EmitUint8(0xD1);
   3144     EmitOperand(reg_or_opcode, Operand(reg));
   3145   } else {
   3146     EmitUint8(0xC1);
   3147     EmitOperand(reg_or_opcode, Operand(reg));
   3148     EmitUint8(imm.value() & 0xFF);
   3149   }
   3150 }
   3151 
   3152 
   3153 void X86_64Assembler::EmitGenericShift(bool wide,
   3154                                        int reg_or_opcode,
   3155                                        CpuRegister operand,
   3156                                        CpuRegister shifter) {
   3157   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   3158   CHECK_EQ(shifter.AsRegister(), RCX);
   3159   if (wide) {
   3160     EmitRex64(operand);
   3161   } else {
   3162     EmitOptionalRex32(operand);
   3163   }
   3164   EmitUint8(0xD3);
   3165   EmitOperand(reg_or_opcode, Operand(operand));
   3166 }
   3167 
   3168 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
   3169   // REX.WRXB
   3170   // W - 64-bit operand
   3171   // R - MODRM.reg
   3172   // X - SIB.index
   3173   // B - MODRM.rm/SIB.base
   3174   uint8_t rex = force ? 0x40 : 0;
   3175   if (w) {
   3176     rex |= 0x48;  // REX.W000
   3177   }
   3178   if (r) {
   3179     rex |= 0x44;  // REX.0R00
   3180   }
   3181   if (x) {
   3182     rex |= 0x42;  // REX.00X0
   3183   }
   3184   if (b) {
   3185     rex |= 0x41;  // REX.000B
   3186   }
   3187   if (rex != 0) {
   3188     EmitUint8(rex);
   3189   }
   3190 }
   3191 
   3192 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
   3193   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
   3194 }
   3195 
   3196 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
   3197   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   3198 }
   3199 
   3200 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
   3201   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   3202 }
   3203 
   3204 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
   3205   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   3206 }
   3207 
   3208 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
   3209   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
   3210 }
   3211 
   3212 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
   3213   uint8_t rex = operand.rex();
   3214   if (rex != 0) {
   3215     EmitUint8(rex);
   3216   }
   3217 }
   3218 
   3219 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
   3220   uint8_t rex = operand.rex();
   3221   if (dst.NeedsRex()) {
   3222     rex |= 0x44;  // REX.0R00
   3223   }
   3224   if (rex != 0) {
   3225     EmitUint8(rex);
   3226   }
   3227 }
   3228 
   3229 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
   3230   uint8_t rex = operand.rex();
   3231   if (dst.NeedsRex()) {
   3232     rex |= 0x44;  // REX.0R00
   3233   }
   3234   if (rex != 0) {
   3235     EmitUint8(rex);
   3236   }
   3237 }
   3238 
   3239 void X86_64Assembler::EmitRex64() {
   3240   EmitOptionalRex(false, true, false, false, false);
   3241 }
   3242 
   3243 void X86_64Assembler::EmitRex64(CpuRegister reg) {
   3244   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
   3245 }
   3246 
   3247 void X86_64Assembler::EmitRex64(const Operand& operand) {
   3248   uint8_t rex = operand.rex();
   3249   rex |= 0x48;  // REX.W000
   3250   EmitUint8(rex);
   3251 }
   3252 
   3253 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
   3254   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
   3255 }
   3256 
   3257 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
   3258   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
   3259 }
   3260 
   3261 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
   3262   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
   3263 }
   3264 
   3265 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
   3266   uint8_t rex = 0x48 | operand.rex();  // REX.W000
   3267   if (dst.NeedsRex()) {
   3268     rex |= 0x44;  // REX.0R00
   3269   }
   3270   EmitUint8(rex);
   3271 }
   3272 
   3273 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
   3274   uint8_t rex = 0x48 | operand.rex();  // REX.W000
   3275   if (dst.NeedsRex()) {
   3276     rex |= 0x44;  // REX.0R00
   3277   }
   3278   EmitUint8(rex);
   3279 }
   3280 
   3281 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
   3282   // For src, SPL, BPL, SIL, DIL need the rex prefix.
   3283   bool force = src.AsRegister() > 3;
   3284   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
   3285 }
   3286 
   3287 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
   3288   uint8_t rex = operand.rex();
   3289   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
   3290   bool force = dst.AsRegister() > 3;
   3291   if (force) {
   3292     rex |= 0x40;  // REX.0000
   3293   }
   3294   if (dst.NeedsRex()) {
   3295     rex |= 0x44;  // REX.0R00
   3296   }
   3297   if (rex != 0) {
   3298     EmitUint8(rex);
   3299   }
   3300 }
   3301 
   3302 void X86_64Assembler::AddConstantArea() {
   3303   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   3304   for (size_t i = 0, e = area.size(); i < e; i++) {
   3305     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   3306     EmitInt32(area[i]);
   3307   }
   3308 }
   3309 
   3310 size_t ConstantArea::AppendInt32(int32_t v) {
   3311   size_t result = buffer_.size() * elem_size_;
   3312   buffer_.push_back(v);
   3313   return result;
   3314 }
   3315 
   3316 size_t ConstantArea::AddInt32(int32_t v) {
   3317   // Look for an existing match.
   3318   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
   3319     if (v == buffer_[i]) {
   3320       return i * elem_size_;
   3321     }
   3322   }
   3323 
   3324   // Didn't match anything.
   3325   return AppendInt32(v);
   3326 }
   3327 
   3328 size_t ConstantArea::AddInt64(int64_t v) {
   3329   int32_t v_low = v;
   3330   int32_t v_high = v >> 32;
   3331   if (buffer_.size() > 1) {
   3332     // Ensure we don't pass the end of the buffer.
   3333     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
   3334       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
   3335         return i * elem_size_;
   3336       }
   3337     }
   3338   }
   3339 
   3340   // Didn't match anything.
   3341   size_t result = buffer_.size() * elem_size_;
   3342   buffer_.push_back(v_low);
   3343   buffer_.push_back(v_high);
   3344   return result;
   3345 }
   3346 
   3347 size_t ConstantArea::AddDouble(double v) {
   3348   // Treat the value as a 64-bit integer value.
   3349   return AddInt64(bit_cast<int64_t, double>(v));
   3350 }
   3351 
   3352 size_t ConstantArea::AddFloat(float v) {
   3353   // Treat the value as a 32-bit integer value.
   3354   return AddInt32(bit_cast<int32_t, float>(v));
   3355 }
   3356 
   3357 }  // namespace x86_64
   3358 }  // namespace art
   3359