Home | History | Annotate | Download | only in x86_64
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
     18 #define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
     19 
     20 #include <vector>
     21 
     22 #include "base/arena_containers.h"
     23 #include "base/array_ref.h"
     24 #include "base/bit_utils.h"
     25 #include "base/macros.h"
     26 #include "constants_x86_64.h"
     27 #include "globals.h"
     28 #include "heap_poisoning.h"
     29 #include "managed_register_x86_64.h"
     30 #include "offsets.h"
     31 #include "utils/assembler.h"
     32 #include "utils/jni_macro_assembler.h"
     33 
     34 namespace art {
     35 namespace x86_64 {
     36 
     37 // Encodes an immediate value for operands.
     38 //
     39 // Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
     40 // to 32b.
     41 //
     42 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
     43 // conversion rules in expressions regarding negation, especially size_t on 32b.
     44 class Immediate : public ValueObject {
     45  public:
     46   explicit Immediate(int64_t value_in) : value_(value_in) {}
     47 
     48   int64_t value() const { return value_; }
     49 
     50   bool is_int8() const { return IsInt<8>(value_); }
     51   bool is_uint8() const { return IsUint<8>(value_); }
     52   bool is_int16() const { return IsInt<16>(value_); }
     53   bool is_uint16() const { return IsUint<16>(value_); }
     54   bool is_int32() const { return IsInt<32>(value_); }
     55 
     56  private:
     57   const int64_t value_;
     58 };
     59 
     60 
     61 class Operand : public ValueObject {
     62  public:
     63   uint8_t mod() const {
     64     return (encoding_at(0) >> 6) & 3;
     65   }
     66 
     67   Register rm() const {
     68     return static_cast<Register>(encoding_at(0) & 7);
     69   }
     70 
     71   ScaleFactor scale() const {
     72     return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
     73   }
     74 
     75   Register index() const {
     76     return static_cast<Register>((encoding_at(1) >> 3) & 7);
     77   }
     78 
     79   Register base() const {
     80     return static_cast<Register>(encoding_at(1) & 7);
     81   }
     82 
     83   CpuRegister cpu_rm() const {
     84     int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
     85     return static_cast<CpuRegister>(rm() + ext);
     86   }
     87 
     88   CpuRegister cpu_index() const {
     89     int ext = (rex_ & 2) != 0 ? x86_64::R8 : x86_64::RAX;
     90     return static_cast<CpuRegister>(index() + ext);
     91   }
     92 
     93   CpuRegister cpu_base() const {
     94     int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
     95     return static_cast<CpuRegister>(base() + ext);
     96   }
     97 
     98   uint8_t rex() const {
     99     return rex_;
    100   }
    101 
    102   int8_t disp8() const {
    103     CHECK_GE(length_, 2);
    104     return static_cast<int8_t>(encoding_[length_ - 1]);
    105   }
    106 
    107   int32_t disp32() const {
    108     CHECK_GE(length_, 5);
    109     int32_t value;
    110     memcpy(&value, &encoding_[length_ - 4], sizeof(value));
    111     return value;
    112   }
    113 
    114   bool IsRegister(CpuRegister reg) const {
    115     return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
    116         && ((encoding_[0] & 0x07) == reg.LowBits())  // Register codes match.
    117         && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
    118   }
    119 
    120   AssemblerFixup* GetFixup() const {
    121     return fixup_;
    122   }
    123 
    124  protected:
    125   // Operand can be sub classed (e.g: Address).
    126   Operand() : rex_(0), length_(0), fixup_(nullptr) { }
    127 
    128   void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
    129     CHECK_EQ(mod_in & ~3, 0);
    130     if (rm_in.NeedsRex()) {
    131       rex_ |= 0x41;  // REX.000B
    132     }
    133     encoding_[0] = (mod_in << 6) | rm_in.LowBits();
    134     length_ = 1;
    135   }
    136 
    137   void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
    138     CHECK_EQ(length_, 1);
    139     CHECK_EQ(scale_in & ~3, 0);
    140     if (base_in.NeedsRex()) {
    141       rex_ |= 0x41;  // REX.000B
    142     }
    143     if (index_in.NeedsRex()) {
    144       rex_ |= 0x42;  // REX.00X0
    145     }
    146     encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
    147         static_cast<uint8_t>(base_in.LowBits());
    148     length_ = 2;
    149   }
    150 
    151   void SetDisp8(int8_t disp) {
    152     CHECK(length_ == 1 || length_ == 2);
    153     encoding_[length_++] = static_cast<uint8_t>(disp);
    154   }
    155 
    156   void SetDisp32(int32_t disp) {
    157     CHECK(length_ == 1 || length_ == 2);
    158     int disp_size = sizeof(disp);
    159     memmove(&encoding_[length_], &disp, disp_size);
    160     length_ += disp_size;
    161   }
    162 
    163   void SetFixup(AssemblerFixup* fixup) {
    164     fixup_ = fixup;
    165   }
    166 
    167  private:
    168   uint8_t rex_;
    169   uint8_t length_;
    170   uint8_t encoding_[6];
    171   AssemblerFixup* fixup_;
    172 
    173   explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
    174 
    175   // Get the operand encoding byte at the given index.
    176   uint8_t encoding_at(int index_in) const {
    177     CHECK_GE(index_in, 0);
    178     CHECK_LT(index_in, length_);
    179     return encoding_[index_in];
    180   }
    181 
    182   friend class X86_64Assembler;
    183 };
    184 
    185 
    186 class Address : public Operand {
    187  public:
    188   Address(CpuRegister base_in, int32_t disp) {
    189     Init(base_in, disp);
    190   }
    191 
    192   Address(CpuRegister base_in, Offset disp) {
    193     Init(base_in, disp.Int32Value());
    194   }
    195 
    196   Address(CpuRegister base_in, FrameOffset disp) {
    197     CHECK_EQ(base_in.AsRegister(), RSP);
    198     Init(CpuRegister(RSP), disp.Int32Value());
    199   }
    200 
    201   Address(CpuRegister base_in, MemberOffset disp) {
    202     Init(base_in, disp.Int32Value());
    203   }
    204 
    205   void Init(CpuRegister base_in, int32_t disp) {
    206     if (disp == 0 && base_in.LowBits() != RBP) {
    207       SetModRM(0, base_in);
    208       if (base_in.LowBits() == RSP) {
    209         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
    210       }
    211     } else if (disp >= -128 && disp <= 127) {
    212       SetModRM(1, base_in);
    213       if (base_in.LowBits() == RSP) {
    214         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
    215       }
    216       SetDisp8(disp);
    217     } else {
    218       SetModRM(2, base_in);
    219       if (base_in.LowBits() == RSP) {
    220         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
    221       }
    222       SetDisp32(disp);
    223     }
    224   }
    225 
    226 
    227   Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
    228     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
    229     SetModRM(0, CpuRegister(RSP));
    230     SetSIB(scale_in, index_in, CpuRegister(RBP));
    231     SetDisp32(disp);
    232   }
    233 
    234   Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
    235     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
    236     if (disp == 0 && base_in.LowBits() != RBP) {
    237       SetModRM(0, CpuRegister(RSP));
    238       SetSIB(scale_in, index_in, base_in);
    239     } else if (disp >= -128 && disp <= 127) {
    240       SetModRM(1, CpuRegister(RSP));
    241       SetSIB(scale_in, index_in, base_in);
    242       SetDisp8(disp);
    243     } else {
    244       SetModRM(2, CpuRegister(RSP));
    245       SetSIB(scale_in, index_in, base_in);
    246       SetDisp32(disp);
    247     }
    248   }
    249 
    250   // If no_rip is true then the Absolute address isn't RIP relative.
    251   static Address Absolute(uintptr_t addr, bool no_rip = false) {
    252     Address result;
    253     if (no_rip) {
    254       result.SetModRM(0, CpuRegister(RSP));
    255       result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
    256       result.SetDisp32(addr);
    257     } else {
    258       // RIP addressing is done using RBP as the base register.
    259       // The value in RBP isn't used.  Instead the offset is added to RIP.
    260       result.SetModRM(0, CpuRegister(RBP));
    261       result.SetDisp32(addr);
    262     }
    263     return result;
    264   }
    265 
    266   // An RIP relative address that will be fixed up later.
    267   static Address RIP(AssemblerFixup* fixup) {
    268     Address result;
    269     // RIP addressing is done using RBP as the base register.
    270     // The value in RBP isn't used.  Instead the offset is added to RIP.
    271     result.SetModRM(0, CpuRegister(RBP));
    272     result.SetDisp32(0);
    273     result.SetFixup(fixup);
    274     return result;
    275   }
    276 
    277   // If no_rip is true then the Absolute address isn't RIP relative.
    278   static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
    279     return Absolute(addr.Int32Value(), no_rip);
    280   }
    281 
    282  private:
    283   Address() {}
    284 };
    285 
    286 std::ostream& operator<<(std::ostream& os, const Address& addr);
    287 
    288 /**
    289  * Class to handle constant area values.
    290  */
    291 class ConstantArea {
    292  public:
    293   explicit ConstantArea(ArenaAllocator* allocator)
    294       : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
    295 
    296   // Add a double to the constant area, returning the offset into
    297   // the constant area where the literal resides.
    298   size_t AddDouble(double v);
    299 
    300   // Add a float to the constant area, returning the offset into
    301   // the constant area where the literal resides.
    302   size_t AddFloat(float v);
    303 
    304   // Add an int32_t to the constant area, returning the offset into
    305   // the constant area where the literal resides.
    306   size_t AddInt32(int32_t v);
    307 
    308   // Add an int32_t to the end of the constant area, returning the offset into
    309   // the constant area where the literal resides.
    310   size_t AppendInt32(int32_t v);
    311 
    312   // Add an int64_t to the constant area, returning the offset into
    313   // the constant area where the literal resides.
    314   size_t AddInt64(int64_t v);
    315 
    316   size_t GetSize() const {
    317     return buffer_.size() * elem_size_;
    318   }
    319 
    320   ArrayRef<const int32_t> GetBuffer() const {
    321     return ArrayRef<const int32_t>(buffer_);
    322   }
    323 
    324  private:
    325   static constexpr size_t elem_size_ = sizeof(int32_t);
    326   ArenaVector<int32_t> buffer_;
    327 };
    328 
    329 
    330 // This is equivalent to the Label class, used in a slightly different context. We
    331 // inherit the functionality of the Label class, but prevent unintended
    332 // derived-to-base conversions by making the base class private.
    333 class NearLabel : private Label {
    334  public:
    335   NearLabel() : Label() {}
    336 
    337   // Expose the Label routines that we need.
    338   using Label::Position;
    339   using Label::LinkPosition;
    340   using Label::IsBound;
    341   using Label::IsUnused;
    342   using Label::IsLinked;
    343 
    344  private:
    345   using Label::BindTo;
    346   using Label::LinkTo;
    347 
    348   friend class x86_64::X86_64Assembler;
    349 
    350   DISALLOW_COPY_AND_ASSIGN(NearLabel);
    351 };
    352 
    353 
    354 class X86_64Assembler FINAL : public Assembler {
    355  public:
    356   explicit X86_64Assembler(ArenaAllocator* allocator)
    357       : Assembler(allocator), constant_area_(allocator) {}
    358   virtual ~X86_64Assembler() {}
    359 
    360   /*
    361    * Emit Machine Instructions.
    362    */
    363   void call(CpuRegister reg);
    364   void call(const Address& address);
    365   void call(Label* label);
    366 
    367   void pushq(CpuRegister reg);
    368   void pushq(const Address& address);
    369   void pushq(const Immediate& imm);
    370 
    371   void popq(CpuRegister reg);
    372   void popq(const Address& address);
    373 
    374   void movq(CpuRegister dst, const Immediate& src);
    375   void movl(CpuRegister dst, const Immediate& src);
    376   void movq(CpuRegister dst, CpuRegister src);
    377   void movl(CpuRegister dst, CpuRegister src);
    378 
    379   void movntl(const Address& dst, CpuRegister src);
    380   void movntq(const Address& dst, CpuRegister src);
    381 
    382   void movq(CpuRegister dst, const Address& src);
    383   void movl(CpuRegister dst, const Address& src);
    384   void movq(const Address& dst, CpuRegister src);
    385   void movq(const Address& dst, const Immediate& imm);
    386   void movl(const Address& dst, CpuRegister src);
    387   void movl(const Address& dst, const Immediate& imm);
    388 
    389   void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
    390   void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
    391   void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
    392 
    393   void movzxb(CpuRegister dst, CpuRegister src);
    394   void movzxb(CpuRegister dst, const Address& src);
    395   void movsxb(CpuRegister dst, CpuRegister src);
    396   void movsxb(CpuRegister dst, const Address& src);
    397   void movb(CpuRegister dst, const Address& src);
    398   void movb(const Address& dst, CpuRegister src);
    399   void movb(const Address& dst, const Immediate& imm);
    400 
    401   void movzxw(CpuRegister dst, CpuRegister src);
    402   void movzxw(CpuRegister dst, const Address& src);
    403   void movsxw(CpuRegister dst, CpuRegister src);
    404   void movsxw(CpuRegister dst, const Address& src);
    405   void movw(CpuRegister dst, const Address& src);
    406   void movw(const Address& dst, CpuRegister src);
    407   void movw(const Address& dst, const Immediate& imm);
    408 
    409   void leaq(CpuRegister dst, const Address& src);
    410   void leal(CpuRegister dst, const Address& src);
    411 
    412   void movaps(XmmRegister dst, XmmRegister src);     // move
    413   void movaps(XmmRegister dst, const Address& src);  // load aligned
    414   void movups(XmmRegister dst, const Address& src);  // load unaligned
    415   void movaps(const Address& dst, XmmRegister src);  // store aligned
    416   void movups(const Address& dst, XmmRegister src);  // store unaligned
    417 
    418   void movss(XmmRegister dst, const Address& src);
    419   void movss(const Address& dst, XmmRegister src);
    420   void movss(XmmRegister dst, XmmRegister src);
    421 
    422   void movsxd(CpuRegister dst, CpuRegister src);
    423   void movsxd(CpuRegister dst, const Address& src);
    424 
    425   void movd(XmmRegister dst, CpuRegister src);  // Note: this is the r64 version, formally movq.
    426   void movd(CpuRegister dst, XmmRegister src);  // Note: this is the r64 version, formally movq.
    427   void movd(XmmRegister dst, CpuRegister src, bool is64bit);
    428   void movd(CpuRegister dst, XmmRegister src, bool is64bit);
    429 
    430   void addss(XmmRegister dst, XmmRegister src);
    431   void addss(XmmRegister dst, const Address& src);
    432   void subss(XmmRegister dst, XmmRegister src);
    433   void subss(XmmRegister dst, const Address& src);
    434   void mulss(XmmRegister dst, XmmRegister src);
    435   void mulss(XmmRegister dst, const Address& src);
    436   void divss(XmmRegister dst, XmmRegister src);
    437   void divss(XmmRegister dst, const Address& src);
    438 
    439   void addps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    440   void subps(XmmRegister dst, XmmRegister src);
    441   void mulps(XmmRegister dst, XmmRegister src);
    442   void divps(XmmRegister dst, XmmRegister src);
    443 
    444   void movapd(XmmRegister dst, XmmRegister src);     // move
    445   void movapd(XmmRegister dst, const Address& src);  // load aligned
    446   void movupd(XmmRegister dst, const Address& src);  // load unaligned
    447   void movapd(const Address& dst, XmmRegister src);  // store aligned
    448   void movupd(const Address& dst, XmmRegister src);  // store unaligned
    449 
    450   void movsd(XmmRegister dst, const Address& src);
    451   void movsd(const Address& dst, XmmRegister src);
    452   void movsd(XmmRegister dst, XmmRegister src);
    453 
    454   void addsd(XmmRegister dst, XmmRegister src);
    455   void addsd(XmmRegister dst, const Address& src);
    456   void subsd(XmmRegister dst, XmmRegister src);
    457   void subsd(XmmRegister dst, const Address& src);
    458   void mulsd(XmmRegister dst, XmmRegister src);
    459   void mulsd(XmmRegister dst, const Address& src);
    460   void divsd(XmmRegister dst, XmmRegister src);
    461   void divsd(XmmRegister dst, const Address& src);
    462 
    463   void addpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    464   void subpd(XmmRegister dst, XmmRegister src);
    465   void mulpd(XmmRegister dst, XmmRegister src);
    466   void divpd(XmmRegister dst, XmmRegister src);
    467 
    468   void movdqa(XmmRegister dst, XmmRegister src);     // move
    469   void movdqa(XmmRegister dst, const Address& src);  // load aligned
    470   void movdqu(XmmRegister dst, const Address& src);  // load unaligned
    471   void movdqa(const Address& dst, XmmRegister src);  // store aligned
    472   void movdqu(const Address& dst, XmmRegister src);  // store unaligned
    473 
    474   void paddb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    475   void psubb(XmmRegister dst, XmmRegister src);
    476 
    477   void paddw(XmmRegister dst, XmmRegister src);
    478   void psubw(XmmRegister dst, XmmRegister src);
    479   void pmullw(XmmRegister dst, XmmRegister src);
    480 
    481   void paddd(XmmRegister dst, XmmRegister src);
    482   void psubd(XmmRegister dst, XmmRegister src);
    483   void pmulld(XmmRegister dst, XmmRegister src);
    484 
    485   void paddq(XmmRegister dst, XmmRegister src);
    486   void psubq(XmmRegister dst, XmmRegister src);
    487 
    488   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
    489   void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
    490   void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
    491   void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
    492   void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
    493   void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
    494 
    495   void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    496   void cvtss2sd(XmmRegister dst, XmmRegister src);
    497   void cvtss2sd(XmmRegister dst, const Address& src);
    498 
    499   void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    500   void cvtsd2ss(XmmRegister dst, XmmRegister src);
    501   void cvtsd2ss(XmmRegister dst, const Address& src);
    502 
    503   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    504   void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
    505   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    506   void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
    507 
    508   void cvtdq2ps(XmmRegister dst, XmmRegister src);
    509   void cvtdq2pd(XmmRegister dst, XmmRegister src);
    510 
    511   void comiss(XmmRegister a, XmmRegister b);
    512   void comiss(XmmRegister a, const Address& b);
    513   void comisd(XmmRegister a, XmmRegister b);
    514   void comisd(XmmRegister a, const Address& b);
    515   void ucomiss(XmmRegister a, XmmRegister b);
    516   void ucomiss(XmmRegister a, const Address& b);
    517   void ucomisd(XmmRegister a, XmmRegister b);
    518   void ucomisd(XmmRegister a, const Address& b);
    519 
    520   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
    521   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
    522 
    523   void sqrtsd(XmmRegister dst, XmmRegister src);
    524   void sqrtss(XmmRegister dst, XmmRegister src);
    525 
    526   void xorpd(XmmRegister dst, const Address& src);
    527   void xorpd(XmmRegister dst, XmmRegister src);
    528   void xorps(XmmRegister dst, const Address& src);
    529   void xorps(XmmRegister dst, XmmRegister src);
    530   void pxor(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    531 
    532   void andpd(XmmRegister dst, const Address& src);
    533   void andpd(XmmRegister dst, XmmRegister src);
    534   void andps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    535   void pand(XmmRegister dst, XmmRegister src);
    536 
    537   void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    538   void andnps(XmmRegister dst, XmmRegister src);
    539   void pandn(XmmRegister dst, XmmRegister src);
    540 
    541   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    542   void orps(XmmRegister dst, XmmRegister src);
    543   void por(XmmRegister dst, XmmRegister src);
    544 
    545   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    546   void pavgw(XmmRegister dst, XmmRegister src);
    547   void psadbw(XmmRegister dst, XmmRegister src);
    548   void pmaddwd(XmmRegister dst, XmmRegister src);
    549   void phaddw(XmmRegister dst, XmmRegister src);
    550   void phaddd(XmmRegister dst, XmmRegister src);
    551   void haddps(XmmRegister dst, XmmRegister src);
    552   void haddpd(XmmRegister dst, XmmRegister src);
    553   void phsubw(XmmRegister dst, XmmRegister src);
    554   void phsubd(XmmRegister dst, XmmRegister src);
    555   void hsubps(XmmRegister dst, XmmRegister src);
    556   void hsubpd(XmmRegister dst, XmmRegister src);
    557 
    558   void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    559   void pmaxsb(XmmRegister dst, XmmRegister src);
    560   void pminsw(XmmRegister dst, XmmRegister src);
    561   void pmaxsw(XmmRegister dst, XmmRegister src);
    562   void pminsd(XmmRegister dst, XmmRegister src);
    563   void pmaxsd(XmmRegister dst, XmmRegister src);
    564 
    565   void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    566   void pmaxub(XmmRegister dst, XmmRegister src);
    567   void pminuw(XmmRegister dst, XmmRegister src);
    568   void pmaxuw(XmmRegister dst, XmmRegister src);
    569   void pminud(XmmRegister dst, XmmRegister src);
    570   void pmaxud(XmmRegister dst, XmmRegister src);
    571 
    572   void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    573   void maxps(XmmRegister dst, XmmRegister src);
    574   void minpd(XmmRegister dst, XmmRegister src);
    575   void maxpd(XmmRegister dst, XmmRegister src);
    576 
    577   void pcmpeqb(XmmRegister dst, XmmRegister src);
    578   void pcmpeqw(XmmRegister dst, XmmRegister src);
    579   void pcmpeqd(XmmRegister dst, XmmRegister src);
    580   void pcmpeqq(XmmRegister dst, XmmRegister src);
    581 
    582   void pcmpgtb(XmmRegister dst, XmmRegister src);
    583   void pcmpgtw(XmmRegister dst, XmmRegister src);
    584   void pcmpgtd(XmmRegister dst, XmmRegister src);
    585   void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
    586 
    587   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
    588   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
    589   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
    590 
    591   void punpcklbw(XmmRegister dst, XmmRegister src);
    592   void punpcklwd(XmmRegister dst, XmmRegister src);
    593   void punpckldq(XmmRegister dst, XmmRegister src);
    594   void punpcklqdq(XmmRegister dst, XmmRegister src);
    595 
    596   void punpckhbw(XmmRegister dst, XmmRegister src);
    597   void punpckhwd(XmmRegister dst, XmmRegister src);
    598   void punpckhdq(XmmRegister dst, XmmRegister src);
    599   void punpckhqdq(XmmRegister dst, XmmRegister src);
    600 
    601   void psllw(XmmRegister reg, const Immediate& shift_count);
    602   void pslld(XmmRegister reg, const Immediate& shift_count);
    603   void psllq(XmmRegister reg, const Immediate& shift_count);
    604 
    605   void psraw(XmmRegister reg, const Immediate& shift_count);
    606   void psrad(XmmRegister reg, const Immediate& shift_count);
    607   // no psraq
    608 
    609   void psrlw(XmmRegister reg, const Immediate& shift_count);
    610   void psrld(XmmRegister reg, const Immediate& shift_count);
    611   void psrlq(XmmRegister reg, const Immediate& shift_count);
    612   void psrldq(XmmRegister reg, const Immediate& shift_count);
    613 
    614   void flds(const Address& src);
    615   void fstps(const Address& dst);
    616   void fsts(const Address& dst);
    617 
    618   void fldl(const Address& src);
    619   void fstpl(const Address& dst);
    620   void fstl(const Address& dst);
    621 
    622   void fstsw();
    623 
    624   void fucompp();
    625 
    626   void fnstcw(const Address& dst);
    627   void fldcw(const Address& src);
    628 
    629   void fistpl(const Address& dst);
    630   void fistps(const Address& dst);
    631   void fildl(const Address& src);
    632   void filds(const Address& src);
    633 
    634   void fincstp();
    635   void ffree(const Immediate& index);
    636 
    637   void fsin();
    638   void fcos();
    639   void fptan();
    640   void fprem();
    641 
    642   void xchgl(CpuRegister dst, CpuRegister src);
    643   void xchgq(CpuRegister dst, CpuRegister src);
    644   void xchgl(CpuRegister reg, const Address& address);
    645 
    646   void cmpb(const Address& address, const Immediate& imm);
    647   void cmpw(const Address& address, const Immediate& imm);
    648 
    649   void cmpl(CpuRegister reg, const Immediate& imm);
    650   void cmpl(CpuRegister reg0, CpuRegister reg1);
    651   void cmpl(CpuRegister reg, const Address& address);
    652   void cmpl(const Address& address, CpuRegister reg);
    653   void cmpl(const Address& address, const Immediate& imm);
    654 
    655   void cmpq(CpuRegister reg0, CpuRegister reg1);
    656   void cmpq(CpuRegister reg0, const Immediate& imm);
    657   void cmpq(CpuRegister reg0, const Address& address);
    658   void cmpq(const Address& address, const Immediate& imm);
    659 
    660   void testl(CpuRegister reg1, CpuRegister reg2);
    661   void testl(CpuRegister reg, const Address& address);
    662   void testl(CpuRegister reg, const Immediate& imm);
    663 
    664   void testq(CpuRegister reg1, CpuRegister reg2);
    665   void testq(CpuRegister reg, const Address& address);
    666 
    667   void testb(const Address& address, const Immediate& imm);
    668   void testl(const Address& address, const Immediate& imm);
    669 
    670   void andl(CpuRegister dst, const Immediate& imm);
    671   void andl(CpuRegister dst, CpuRegister src);
    672   void andl(CpuRegister reg, const Address& address);
    673   void andq(CpuRegister dst, const Immediate& imm);
    674   void andq(CpuRegister dst, CpuRegister src);
    675   void andq(CpuRegister reg, const Address& address);
    676 
    677   void orl(CpuRegister dst, const Immediate& imm);
    678   void orl(CpuRegister dst, CpuRegister src);
    679   void orl(CpuRegister reg, const Address& address);
    680   void orq(CpuRegister dst, CpuRegister src);
    681   void orq(CpuRegister dst, const Immediate& imm);
    682   void orq(CpuRegister reg, const Address& address);
    683 
    684   void xorl(CpuRegister dst, CpuRegister src);
    685   void xorl(CpuRegister dst, const Immediate& imm);
    686   void xorl(CpuRegister reg, const Address& address);
    687   void xorq(CpuRegister dst, const Immediate& imm);
    688   void xorq(CpuRegister dst, CpuRegister src);
    689   void xorq(CpuRegister reg, const Address& address);
    690 
    691   void addl(CpuRegister dst, CpuRegister src);
    692   void addl(CpuRegister reg, const Immediate& imm);
    693   void addl(CpuRegister reg, const Address& address);
    694   void addl(const Address& address, CpuRegister reg);
    695   void addl(const Address& address, const Immediate& imm);
    696   void addw(const Address& address, const Immediate& imm);
    697 
    698   void addq(CpuRegister reg, const Immediate& imm);
    699   void addq(CpuRegister dst, CpuRegister src);
    700   void addq(CpuRegister dst, const Address& address);
    701 
    702   void subl(CpuRegister dst, CpuRegister src);
    703   void subl(CpuRegister reg, const Immediate& imm);
    704   void subl(CpuRegister reg, const Address& address);
    705 
    706   void subq(CpuRegister reg, const Immediate& imm);
    707   void subq(CpuRegister dst, CpuRegister src);
    708   void subq(CpuRegister dst, const Address& address);
    709 
    710   void cdq();
    711   void cqo();
    712 
    713   void idivl(CpuRegister reg);
    714   void idivq(CpuRegister reg);
    715 
    716   void imull(CpuRegister dst, CpuRegister src);
    717   void imull(CpuRegister reg, const Immediate& imm);
    718   void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
    719   void imull(CpuRegister reg, const Address& address);
    720 
    721   void imulq(CpuRegister src);
    722   void imulq(CpuRegister dst, CpuRegister src);
    723   void imulq(CpuRegister reg, const Immediate& imm);
    724   void imulq(CpuRegister reg, const Address& address);
    725   void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
    726 
    727   void imull(CpuRegister reg);
    728   void imull(const Address& address);
    729 
    730   void mull(CpuRegister reg);
    731   void mull(const Address& address);
    732 
    733   void shll(CpuRegister reg, const Immediate& imm);
    734   void shll(CpuRegister operand, CpuRegister shifter);
    735   void shrl(CpuRegister reg, const Immediate& imm);
    736   void shrl(CpuRegister operand, CpuRegister shifter);
    737   void sarl(CpuRegister reg, const Immediate& imm);
    738   void sarl(CpuRegister operand, CpuRegister shifter);
    739 
    740   void shlq(CpuRegister reg, const Immediate& imm);
    741   void shlq(CpuRegister operand, CpuRegister shifter);
    742   void shrq(CpuRegister reg, const Immediate& imm);
    743   void shrq(CpuRegister operand, CpuRegister shifter);
    744   void sarq(CpuRegister reg, const Immediate& imm);
    745   void sarq(CpuRegister operand, CpuRegister shifter);
    746 
    747   void negl(CpuRegister reg);
    748   void negq(CpuRegister reg);
    749 
    750   void notl(CpuRegister reg);
    751   void notq(CpuRegister reg);
    752 
    753   void enter(const Immediate& imm);
    754   void leave();
    755 
    756   void ret();
    757   void ret(const Immediate& imm);
    758 
    759   void nop();
    760   void int3();
    761   void hlt();
    762 
    763   void j(Condition condition, Label* label);
    764   void j(Condition condition, NearLabel* label);
    765   void jrcxz(NearLabel* label);
    766 
    767   void jmp(CpuRegister reg);
    768   void jmp(const Address& address);
    769   void jmp(Label* label);
    770   void jmp(NearLabel* label);
    771 
    772   X86_64Assembler* lock();
    773   void cmpxchgl(const Address& address, CpuRegister reg);
    774   void cmpxchgq(const Address& address, CpuRegister reg);
    775 
    776   void mfence();
    777 
    778   X86_64Assembler* gs();
    779 
    780   void setcc(Condition condition, CpuRegister dst);
    781 
    782   void bswapl(CpuRegister dst);
    783   void bswapq(CpuRegister dst);
    784 
    785   void bsfl(CpuRegister dst, CpuRegister src);
    786   void bsfl(CpuRegister dst, const Address& src);
    787   void bsfq(CpuRegister dst, CpuRegister src);
    788   void bsfq(CpuRegister dst, const Address& src);
    789 
    790   void bsrl(CpuRegister dst, CpuRegister src);
    791   void bsrl(CpuRegister dst, const Address& src);
    792   void bsrq(CpuRegister dst, CpuRegister src);
    793   void bsrq(CpuRegister dst, const Address& src);
    794 
    795   void popcntl(CpuRegister dst, CpuRegister src);
    796   void popcntl(CpuRegister dst, const Address& src);
    797   void popcntq(CpuRegister dst, CpuRegister src);
    798   void popcntq(CpuRegister dst, const Address& src);
    799 
    800   void rorl(CpuRegister reg, const Immediate& imm);
    801   void rorl(CpuRegister operand, CpuRegister shifter);
    802   void roll(CpuRegister reg, const Immediate& imm);
    803   void roll(CpuRegister operand, CpuRegister shifter);
    804 
    805   void rorq(CpuRegister reg, const Immediate& imm);
    806   void rorq(CpuRegister operand, CpuRegister shifter);
    807   void rolq(CpuRegister reg, const Immediate& imm);
    808   void rolq(CpuRegister operand, CpuRegister shifter);
    809 
    810   void repne_scasb();
    811   void repne_scasw();
    812   void repe_cmpsw();
    813   void repe_cmpsl();
    814   void repe_cmpsq();
    815   void rep_movsw();
    816 
    817   //
    818   // Macros for High-level operations.
    819   //
    820 
    821   void AddImmediate(CpuRegister reg, const Immediate& imm);
    822 
    823   void LoadDoubleConstant(XmmRegister dst, double value);
    824 
    825   void LockCmpxchgl(const Address& address, CpuRegister reg) {
    826     lock()->cmpxchgl(address, reg);
    827   }
    828 
    829   void LockCmpxchgq(const Address& address, CpuRegister reg) {
    830     lock()->cmpxchgq(address, reg);
    831   }
    832 
    833   //
    834   // Misc. functionality
    835   //
    836   int PreferredLoopAlignment() { return 16; }
    837   void Align(int alignment, int offset);
    838   void Bind(Label* label) OVERRIDE;
    839   void Jump(Label* label) OVERRIDE {
    840     jmp(label);
    841   }
    842   void Bind(NearLabel* label);
    843 
    844   // Add a double to the constant area, returning the offset into
    845   // the constant area where the literal resides.
    846   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
    847 
    848   // Add a float to the constant area, returning the offset into
    849   // the constant area where the literal resides.
    850   size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
    851 
    852   // Add an int32_t to the constant area, returning the offset into
    853   // the constant area where the literal resides.
    854   size_t AddInt32(int32_t v) {
    855     return constant_area_.AddInt32(v);
    856   }
    857 
    858   // Add an int32_t to the end of the constant area, returning the offset into
    859   // the constant area where the literal resides.
    860   size_t AppendInt32(int32_t v) {
    861     return constant_area_.AppendInt32(v);
    862   }
    863 
    864   // Add an int64_t to the constant area, returning the offset into
    865   // the constant area where the literal resides.
    866   size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
    867 
    868   // Add the contents of the constant area to the assembler buffer.
    869   void AddConstantArea();
    870 
    871   // Is the constant area empty? Return true if there are no literals in the constant area.
    872   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
    873 
    874   // Return the current size of the constant area.
    875   size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
    876 
    877   //
    878   // Heap poisoning.
    879   //
    880 
    881   // Poison a heap reference contained in `reg`.
    882   void PoisonHeapReference(CpuRegister reg) { negl(reg); }
    883   // Unpoison a heap reference contained in `reg`.
    884   void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
    885   // Poison a heap reference contained in `reg` if heap poisoning is enabled.
    886   void MaybePoisonHeapReference(CpuRegister reg) {
    887     if (kPoisonHeapReferences) {
    888       PoisonHeapReference(reg);
    889     }
    890   }
    891   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
    892   void MaybeUnpoisonHeapReference(CpuRegister reg) {
    893     if (kPoisonHeapReferences) {
    894       UnpoisonHeapReference(reg);
    895     }
    896   }
    897 
    898  private:
    899   void EmitUint8(uint8_t value);
    900   void EmitInt32(int32_t value);
    901   void EmitInt64(int64_t value);
    902   void EmitRegisterOperand(uint8_t rm, uint8_t reg);
    903   void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
    904   void EmitFixup(AssemblerFixup* fixup);
    905   void EmitOperandSizeOverride();
    906 
    907   void EmitOperand(uint8_t rm, const Operand& operand);
    908   void EmitImmediate(const Immediate& imm, bool is_16_op = false);
    909   void EmitComplex(
    910       uint8_t rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
    911   void EmitLabel(Label* label, int instruction_size);
    912   void EmitLabelLink(Label* label);
    913   void EmitLabelLink(NearLabel* label);
    914 
    915   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
    916   void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
    917 
    918   // If any input is not false, output the necessary rex prefix.
    919   void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
    920 
    921   // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
    922   void EmitOptionalRex32(CpuRegister reg);
    923   void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
    924   void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
    925   void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
    926   void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
    927   void EmitOptionalRex32(const Operand& operand);
    928   void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
    929   void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
    930 
    931   // Emit a REX.W prefix plus necessary register bit encodings.
    932   void EmitRex64();
    933   void EmitRex64(CpuRegister reg);
    934   void EmitRex64(const Operand& operand);
    935   void EmitRex64(CpuRegister dst, CpuRegister src);
    936   void EmitRex64(CpuRegister dst, const Operand& operand);
    937   void EmitRex64(XmmRegister dst, const Operand& operand);
    938   void EmitRex64(XmmRegister dst, CpuRegister src);
    939   void EmitRex64(CpuRegister dst, XmmRegister src);
    940 
    941   // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
    942   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
    943   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
    944 
    945   ConstantArea constant_area_;
    946 
    947   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
    948 };
    949 
    950 inline void X86_64Assembler::EmitUint8(uint8_t value) {
    951   buffer_.Emit<uint8_t>(value);
    952 }
    953 
    954 inline void X86_64Assembler::EmitInt32(int32_t value) {
    955   buffer_.Emit<int32_t>(value);
    956 }
    957 
    958 inline void X86_64Assembler::EmitInt64(int64_t value) {
    959   // Write this 64-bit value as two 32-bit words for alignment reasons
    960   // (this is essentially when running on ARM, which does not allow
    961   // 64-bit unaligned accesses).  We assume little-endianness here.
    962   EmitInt32(Low32Bits(value));
    963   EmitInt32(High32Bits(value));
    964 }
    965 
    966 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
    967   CHECK_GE(rm, 0);
    968   CHECK_LT(rm, 8);
    969   buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
    970 }
    971 
    972 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
    973   EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
    974 }
    975 
    976 inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
    977   buffer_.EmitFixup(fixup);
    978 }
    979 
    980 inline void X86_64Assembler::EmitOperandSizeOverride() {
    981   EmitUint8(0x66);
    982 }
    983 
    984 }  // namespace x86_64
    985 }  // namespace art
    986 
    987 #endif  // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
    988